figshare_api_v2 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,125 @@
1
+ module Figshare
2
+
3
+ # Figshare Public Articles API
4
+ #
5
+ class PublicArticles < Base
6
+
7
+ # Requests a list of public articles
8
+ #
9
+ # @param institution [Boolean] Just our institution
10
+ # @param group_id [Integer] Only return this group's collections
11
+ # @param published_since [Time] Return results if published after this time
12
+ # @param modified_since [Time] Return results if modified after this time
13
+ # @param resource_doi [String] Matches this resource doi
14
+ # @param item_type [String] Matches this item_type. See Figshare API docs for list (https://docs.figshare.com/#articles_list)
15
+ # @param doi [String] Matches this doi
16
+ # @param handle [String] Matches this handle
17
+ # @param order [String] "published_date" Default, "modified_date", "views", "cites", "shares"
18
+ # @param order_direction [String] "desc" Default, "asc"
19
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
20
+ def list(institute: false, group_id: nil,
21
+ published_since: nil, modified_since: nil,
22
+ item_type: nil, resource_doi: nil, doi: nil, handle: nil,
23
+ order: 'published_date', order_direction: 'desc',
24
+ &block
25
+ )
26
+ args = {}
27
+ args['institution'] = @institute_id if ! institute.nil?
28
+ args['group_id'] = group_id if ! group_id.nil?
29
+ args['item_type'] = item_type if ! item_type.nil?
30
+ args['resource_doi'] = resource_doi if ! resource_doi.nil?
31
+ args['doi'] = doi if ! doi.nil?
32
+ args['handle'] = handle if ! handle.nil?
33
+ args['published_since'] = published_since if ! published_since.nil?
34
+ args['modified_since'] = modified_since if ! modified_since.nil?
35
+ args['order'] = order if ! order.nil?
36
+ args['order_direction'] = order_direction if ! order_direction.nil?
37
+ get_paginate(api_query: 'articles', args: args, &block)
38
+ end
39
+
40
+ # Search within the public articles
41
+ #
42
+ # @param institution [Boolean] Just our institution
43
+ # @param group_id [Integer] Only return this group's collections
44
+ # @param published_since [Time] Return results if published after this time
45
+ # @param modified_since [Time] Return results if modified after this time
46
+ # @param resource_doi [String] Matches this resource doi
47
+ # @param item_type [String] Matches this item_type. See Figshare API docs for list (https://docs.figshare.com/#articles_list)
48
+ # @param doi [String] Matches this doi
49
+ # @param handle [String] Matches this handle
50
+ # @param order [String] "published_date" Default, "modified_date", "views", "cites", "shares"
51
+ # @param order_direction [String] "desc" Default, "asc"
52
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
53
+ def search(institute: false, group_id: nil,
54
+ published_since: nil, modified_since: nil,
55
+ item_type: nil, resource_doi: nil, doi: nil, handle: nil,
56
+ order: 'published_date', order_direction: 'desc',
57
+ search_for:,
58
+ &block
59
+ )
60
+ args = { 'search_for' => search_for }
61
+ args['institution'] = @institute_id if ! institute.nil?
62
+ args['group_id'] = group_id if ! group_id.nil?
63
+ args['item_type'] = item_type if ! item_type.nil?
64
+ args['resource_doi'] = resource_doi if ! resource_doi.nil?
65
+ args['doi'] = doi if ! doi.nil?
66
+ args['handle'] = handle if ! handle.nil?
67
+ args['published_since'] = published_since if ! published_since.nil?
68
+ args['modified_since'] = modified_since if ! modified_since.nil?
69
+ args['order'] = order if ! order.nil?
70
+ args['order_direction'] = order_direction if ! order_direction.nil?
71
+ post(api_query: 'articles/search', args: args, &block)
72
+ end
73
+
74
+ # Return details of specific article (default version)
75
+ #
76
+ # @param article_id [Integer] Figshare id of the article
77
+ # @yield [Hash] See figshare api docs
78
+ def detail(article_id:, &block)
79
+ get(api_query: "articles/#{article_id}", &block)
80
+ end
81
+
82
+ # Return details of list of versions for a specific article
83
+ #
84
+ # @param article_id [Integer] Figshare id of the artcle
85
+ # @yield [Hash] See figshare api docs
86
+ def versions(article_id:, &block)
87
+ get(api_query: "articles/#{article_id}/versions", &block)
88
+ end
89
+
90
+ # Return details of specific article version
91
+ #
92
+ # @param article_id [Integer] Figshare id of the article
93
+ # @param version_id [Integer] Figshare id of the article's version
94
+ # @param embargo [Boolean] Include only embargoed items
95
+ # @param confidentiality [Boolean] Include only confidential items
96
+ # @yield [Hash] See figshare api docs
97
+ def version_detail(article_id:, version_id: , embargo: false, confidentiality: false, &block)
98
+ if embargo
99
+ get(api_query: "articles/#{article_id}/versions/#{version_id}/embargo", &block)
100
+ elsif confidentiality
101
+ get(api_query: "articles/#{article_id}/versions/#{version_id}/confidentiality", &block)
102
+ else
103
+ get(api_query: "articles/#{article_id}/versions/#{version_id}", &block)
104
+ end
105
+ end
106
+
107
+ # Return details of list of files for a specific articles
108
+ #
109
+ # @param article_id [Integer] Figshare id of the article
110
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
111
+ def files(article_id:)
112
+ get(api_query: "articles/#{article_id}/files", &block)
113
+ end
114
+
115
+ # Return details of a specific file for a specific articles
116
+ #
117
+ # @param article_id [Integer] Figshare id of the article
118
+ # @param file_id [Integer] Figshare id of the file
119
+ # @yield [Hash] See figshare api docs
120
+ def file_detail(article_id:, file_id:)
121
+ get(api_query: "articles/#{article_id}/files/#{file_id}", &block)
122
+ end
123
+
124
+ end
125
+ end
@@ -0,0 +1,105 @@
1
+ module Figshare
2
+
3
+ # Figshare public colections api calls
4
+ #
5
+ class PublicCollections < Base
6
+
7
+ # Requests a list of public collections
8
+ #
9
+ # @param institution [Boolean] Just our institution
10
+ # @param group_id [Integer] Only return this group's collections
11
+ # @param published_since [Time] Return results if published after this time
12
+ # @param modified_since [Time] Return results if modified after this time
13
+ # @param resource_doi [String] Matches this resource doi
14
+ # @param doi [String] Matches this doi
15
+ # @param handle [String] Matches this handle
16
+ # @param order [String] "published_date" Default, "modified_date", "views", "cites", "shares"
17
+ # @param order_direction [String] "desc" Default, "asc"
18
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
19
+ def list(institution: false, group_id: nil,
20
+ published_since: nil, modified_since: nil,
21
+ resource_doi: nil, doi: nil, handle: nil,
22
+ order: 'published_date', order_direction: 'desc',
23
+ &block
24
+ )
25
+ args = {}
26
+ args['institution'] = @institute_id if ! institution.nil?
27
+ args['group'] = group_id if ! group_id.nil?
28
+ args['resource_doi'] = resource_doi if ! resource_doi.nil?
29
+ args['doi'] = doi if ! doi.nil?
30
+ args['handle'] = handle if ! handle.nil?
31
+ args['published_since'] = published_since if ! published_since.nil?
32
+ args['modified_since'] = modified_since if ! modified_since.nil?
33
+ args['order'] = order if ! order.nil?
34
+ args['order_direction'] = order_direction if ! order_direction.nil?
35
+ get_paginate(api_query: 'collections', args: args, &block)
36
+ end
37
+
38
+ # Search within the public collections
39
+ #
40
+ # @param institution [Boolean] Just our institution
41
+ # @param group_id [Integer] Only return this group's collections
42
+ # @param published_since [Time] Return results if published after this time
43
+ # @param modified_since [Time] Return results if modified after this time
44
+ # @param resource_doi [String] Matches this resource doi
45
+ # @param doi [String] Matches this doi
46
+ # @param handle [String] Matches this handle
47
+ # @param order [String] "published_date" Default, "modified_date", "views", "cites", "shares"
48
+ # @param order_direction [String] "desc" Default, "asc"
49
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
50
+ def search( institute: false, group_id: nil,
51
+ published_since: nil, modified_since: nil,
52
+ item_type: nil, resource_doi: nil, doi: nil, handle: nil,
53
+ order: 'published_date', order_direction: 'desc',
54
+ search_for:,
55
+ &block
56
+ )
57
+ args = { 'search_for' => search_for }
58
+ args['institution'] = @institute_id if ! institute.nil?
59
+ args['group_id'] = group_id if ! group_id.nil?
60
+ args['item_type'] = item_type if ! item_type.nil?
61
+ args['resource_doi'] = resource_doi if ! resource_doi.nil?
62
+ args['doi'] = doi if ! doi.nil?
63
+ args['handle'] = handle if ! handle.nil?
64
+ args['published_since'] = published_since if ! published_since.nil?
65
+ args['modified_since'] = modified_since if ! modified_since.nil?
66
+ args['order'] = order if ! order.nil?
67
+ args['order_direction'] = order_direction if ! order_direction.nil?
68
+ post(api_query: 'account/articles/search', args: args, &block)
69
+ end
70
+
71
+ # Get details of specific collection (default version)
72
+ #
73
+ # @param collection_id [Integer] Figshare id of the collection
74
+ # @yield [Hash] See figshare api docs
75
+ def detail(collection_id:, &block)
76
+ get(api_query: "collections/#{collection_id}", &block)
77
+ end
78
+
79
+ # Return details of a list of public collection Versions
80
+ #
81
+ # @param collection_id [Integer] Figshare id of the collection
82
+ # @yield [Hash] See figshare api docs
83
+ def versions(collection_id:, &block)
84
+ get(api_query: "collections/#{collection_id}/versions", &block)
85
+ end
86
+
87
+ # Get details of specific collection version
88
+ #
89
+ # @param collection_id [Integer] Figshare id of the collection
90
+ # @param version_id [Integer] Figshare id of the collection's version
91
+ # @yield [Hash] See figshare api docs
92
+ def version_detail(collection_id:, version_id:, &block)
93
+ get(api_query: "collections/#{collection_id}/versions/#{version_id}", &block)
94
+ end
95
+
96
+ # Get details of list of articles for a specific collection
97
+ #
98
+ # @param collection_id [Integer] Figshare id of the collection
99
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
100
+ def articles(collection_id:, &block)
101
+ get_paginate(api_query: "collections/#{collection_id}/articles", &block)
102
+ end
103
+ end #of class
104
+
105
+ end #of module
@@ -0,0 +1,66 @@
1
+ module Figshare
2
+
3
+ # Figshare public projects api
4
+ #
5
+ class PublicProjects < Base
6
+ # Requests a list projects
7
+ #
8
+ # @param institution [Boolean] Just our institution
9
+ # @param group_id [Integer] Only return this group's project
10
+ # @param published_since [Time] Return results if published after this time
11
+ # @param order [String] "published_date" Default, "modified_date", "views", "cites", "shares"
12
+ # @param order_direction [String] "desc" Default, "asc"
13
+ # @yield [Hash] {url, published_date, id, title}
14
+ def list(institute: false,group_id: nil, published_since: nil, order: 'published_date', order_direction: 'desc', &block)
15
+ args = {}
16
+ args['institution'] = @institute_id if ! institute.nil?
17
+ args['group'] = group_id if ! group_id.nil?
18
+ args['published_since'] = published_since if ! published_since.nil?
19
+ args['order'] = order if ! order.nil?
20
+ args['order_direction'] = order_direction if ! order_direction.nil?
21
+ get_paginate(api_query: 'projects', args: args, &block)
22
+ end
23
+
24
+ # Search within all projects
25
+ #
26
+ # @param institution [Boolean] Just our institution
27
+ # @param group_id [Integer] Only return this group's project
28
+ # @param published_since [Time] Return results if published after this time
29
+ # @param modified_since [Time] Return results if modified after this time
30
+ # @param order [String] "published_date" Default, "modified_date", "views", "cites", "shares"
31
+ # @param order_direction [String] "desc" Default, "asc"
32
+ # @yield [Hash] {id, title, doi, handle, url, published_date}
33
+ def search(institute: false, group_id: nil,
34
+ published_since: nil, modified_since: nil,
35
+ order: 'published_date', order_direction: 'desc',
36
+ search_for:,
37
+ &block
38
+ )
39
+ args = { 'search_for' => search_for }
40
+ args['institution'] = @institute_id if ! institute.nil?
41
+ args['group'] = group_id if ! group_id.nil?
42
+ args['published_since'] = published_since if ! published_since.nil?
43
+ args['modified_since'] = modified_since if ! modified_since.nil?
44
+ args['order'] = order if ! order.nil?
45
+ args['order_direction'] = order_direction if ! order_direction.nil?
46
+ post(api_query: 'account/projects/search', args: args, &block)
47
+ end
48
+
49
+ # Return details of specific project_id
50
+ #
51
+ # @param project_id [Integer] Figshare id of the project_id
52
+ # @yield [Hash] See figshare api docs
53
+ def detail(project_id:, &block)
54
+ get(api_query: "projects/#{project_id}", &block)
55
+ end
56
+
57
+ # Get list of articles for a specific project
58
+ #
59
+ # @param project_id [Integer] Figshare id of the project
60
+ # @yield [Hash] See Figshare API Doc
61
+ def articles(project_id:, &block)
62
+ get_paginate(api_query: "projects/#{project_id}/articles", &block)
63
+ end
64
+
65
+ end # of class
66
+ end # of module
@@ -0,0 +1,16 @@
1
+ module Figshare
2
+ # Figshare stats API (not implemented yet)
3
+ #
4
+ class Stats
5
+ def breakdown
6
+ end
7
+ def timeline
8
+ end
9
+ def tops
10
+ end
11
+ def totals
12
+ end
13
+ def count
14
+ end
15
+ end #class
16
+ end # module
@@ -0,0 +1,215 @@
1
+ module Figshare
2
+ require 'digest'
3
+ require 'dir_r'
4
+
5
+ # Upload files to figshare
6
+ # Nb. This can sometimes fail, so you need to check the md5 to ensure the file got there
7
+ # It can take a short while for the md5 to be calculated, so upload, wait, then check for a computed_md5.
8
+ # The status will show as "ic_checking", "moving_to_final" then to "available",
9
+ # I have seen it stuck at "moving_to_final", but with the right computed_md5.
10
+ #
11
+ class Upload < PrivateArticles
12
+ CHUNK_SIZE = 1048576
13
+ attr_accessor :file_info, :upload_query, :upload_host, :upload_parts_detail , :file_id, :article_id, :file_name
14
+ attr_accessor :new_count, :bad_count
15
+
16
+ # Calculate a local files MD5.
17
+ #
18
+ # @param filename [String] Path/name of local file to MD5
19
+ # @return [String,Integer] MD5 as a Hex String, Size of the file in bytes.
20
+ def self.get_file_check_data(filename)
21
+ stat_record = File.stat(filename)
22
+ md5 = Digest::MD5.new
23
+ File.open(filename, 'rb') do |fd|
24
+ while(buffer = fd.read(CHUNK_SIZE))
25
+ md5.update(buffer)
26
+ end
27
+ end
28
+ return md5.hexdigest, stat_record.size
29
+ end
30
+
31
+ # Upload the file, to the Figshare article
32
+ #
33
+ # @param article_id [Integer] Figshare article id
34
+ # @param file_name [String] path/file_name to upload
35
+ # @param trace [Integer] 0: no output, 1: per file upload message, 2: fuller trace
36
+ def upload(article_id:, file_name:, trace: 0)
37
+ @article_id = article_id
38
+ @file_name = file_name
39
+ @trace = trace
40
+
41
+ @file_id = nil
42
+ @file_info = nil
43
+ @upload_query = nil
44
+ @upload_host = nil
45
+ @upload_parts_detail = nil
46
+
47
+ initiate_new_upload()
48
+ puts "New File_id: #{@file_id}\n\n" if @trace > 1
49
+
50
+ get_file_info()
51
+ puts "@file_info: #{@file_info.to_j}\n\n" if @trace > 1
52
+
53
+ get_upload_parts_details()
54
+ puts "@upload_parts_detail: #{@upload_parts_detail.to_j}\n\n" if @trace > 1
55
+
56
+ upload_the_parts()
57
+
58
+ complete_upload()
59
+ if @trace > 1
60
+ status
61
+ puts "Final Status: #{@file_info.to_j}\n\n"
62
+ end
63
+ end
64
+
65
+ # Upload all files in a directory, into one article.
66
+ # Check checksums, and only upload changed or new files
67
+ # Does not recurse through sub-directories, as figshare has a flat file structure.
68
+ #
69
+ # @param article_id [Integer] Figshare article id
70
+ # @param directory [String] path
71
+ # @param delete_extras [Boolean] delete any files in the figshare end, that aren't in the local directory.
72
+ # @param trace [Integer] 0: no output, 1: per file upload message, 2: fuller trace
73
+ def upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0)
74
+ @new_count = 0
75
+ @bad_count = 0
76
+
77
+ files = {}
78
+ cache_article_file_md5(article_id: article_id)
79
+
80
+ DirR.walk_dir(directory: directory, walk_sub_directories: false) do |d,f|
81
+ next if exclude_dot_files && f =~ /^\..*/
82
+ files[f] = true #note that we have seen this filename
83
+ if @md5_cache[f] #check to see if it has already been uploaded
84
+ md5, size = Upload.get_file_check_data("#{d}/#{f}")
85
+ if @md5_cache[f][:md5] != md5 #file is there, but has changed, or previously failed to upload.
86
+ puts "Deleting: #{article_id} << #{d}/#{f} #{@md5_cache[f][:id]} MISMATCH '#{@md5_cache[f]}' != '#{md5}'" if trace > 0
87
+ file_delete(article_id: article_id, file_id: @md5_cache[f][:id])
88
+ @bad_count += 1
89
+ puts "Re-ADDING: #{article_id} << #{d}/#{f}" if trace > 0
90
+ upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
91
+ @new_count += 1
92
+ elsif trace > 1
93
+ puts "EXISTS: #{article_id} #{d}/#{f}"
94
+ end
95
+ else
96
+ puts "ADDING: #{article_id} << #{d}/#{f}" if trace > 0
97
+ upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
98
+ @new_count += 1
99
+ end
100
+ end
101
+
102
+ # Print out filename of files in the Figshare article, that weren't in the directory.
103
+ @md5_cache.each do |fn,v|
104
+ if ! files[fn]
105
+ #File exists on Figshare, but not on the local disk
106
+ if delete_extras
107
+ puts "Deleteing EXTRA: #{article_id} << #{fn} #{v[:id]}" if trace > 0
108
+ file_delete(article_id: article_id, file_id: @md5_cache[f][:id])
109
+ elsif trace > 0
110
+ puts "EXTRA: #{article_id} << #{fn} #{v[:id]}"
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ # Retrieve md5 sums of the existing files in the figshare article
117
+ # Sets @md5_cache[filename] => figshare.computed_md5
118
+ #
119
+ # @param article_id [Integer] Figshare article ID
120
+ private def cache_article_file_md5(article_id:)
121
+ @md5_cache = {}
122
+ files(article_id: article_id) do |f|
123
+ @md5_cache[f['name']] = {:article_id => article_id, :id => f['id'], :md5 => f[ 'computed_md5']}
124
+ end
125
+ end
126
+
127
+ # Get status of the current upload.
128
+ # Just fetches the file record from figshare.
129
+ # Of interest is the status field, and the computed_md5 field
130
+ #
131
+ # @return [Hash] Figshare file record, or nil, if the call fails
132
+ def status
133
+ @file_info = nil
134
+ file_detail(article_id: @article_id, file_id: @file_id) do |f|
135
+ @file_info = f
136
+ end
137
+ raise "Upload::status(): Failed to get figshare file record" if @file_info.nil?
138
+ end
139
+
140
+ # Creates a new Figshare file record, in the figshare article, and we get the file_id from the upload URL
141
+ # file status == 'created'
142
+ #
143
+ private def initiate_new_upload
144
+ md5, size = Upload.get_file_check_data(@file_name)
145
+ args = {'name' => File.basename(@file_name),
146
+ 'md5' => md5,
147
+ 'size'=> size
148
+ }
149
+ post( api_query: "account/articles/#{@article_id}/files", args: args ) do |f|
150
+ @file_id = f['location'].gsub(/^.*\/([0-9]+)$/, '\1')
151
+ end
152
+ raise "Upload::initiate_new_upload(): failed to create Figshare file record" if @file_id.nil?
153
+ end
154
+
155
+ # Gets the Figshare file info
156
+ # We need the upload URLs to continue
157
+ #
158
+ private def get_file_info
159
+ status
160
+ @upload_host = @file_info['upload_url'].gsub(/^http.*\/\/(.*)\/upload.*$/, '\1')
161
+ @upload_query = @file_info['upload_url'].gsub(/^http.*\/\/(.*)\/(upload.*)$/, '\2')
162
+ puts "Upload_host: #{@upload_host} URL: #{@upload_query}" if @trace > 1
163
+ end
164
+
165
+ # Completes the upload.
166
+ # Figshare then calculates the md5 in the background, which may take a while to complete
167
+ # And sometimes the checksum never gets calculated, and is left blank.
168
+ #
169
+ private def complete_upload
170
+ post( api_query: "account/articles/#{@article_id}/files/#{@file_id}" )
171
+ puts "complete_upload" if trace > 1
172
+ end
173
+
174
+ # Get the upload settings
175
+ #
176
+ private def get_upload_parts_details
177
+ @upload_parts_detail = nil
178
+ result = nil
179
+ WIKK::WebBrowser.https_session( host: @upload_host, verify_cert: false ) do |ws|
180
+ result = ws.get_page( query: @upload_query,
181
+ authorization: "token #{@auth_token}",
182
+ )
183
+ end
184
+ raise "get_upload_parts_detail(#{@article_id}) failed to get upload URL" if result.nil?
185
+ @upload_parts_detail = JSON.parse(result)
186
+
187
+ puts "Part URL #{@upload_parts_detail['parts']}" if @trace > 1
188
+ end
189
+
190
+ # Upload the file in parts
191
+ #
192
+ private def upload_the_parts
193
+ parts = @upload_parts_detail['parts']
194
+ File.open(@file_name, 'rb') do |fin|
195
+ parts.each do |part|
196
+ data = fin.read(part['endOffset'] - part['startOffset'] + 1)
197
+ upload_part(buffer: data, part: part['partNo'])
198
+ end
199
+ end
200
+ end
201
+
202
+ # Upload just one part
203
+ #
204
+ private def upload_part(buffer:, part:)
205
+ puts "upload_part(#{part})" if @trace > 1
206
+ WIKK::WebBrowser.https_session( host: @upload_host, verify_cert: false ) do |ws|
207
+ ws.put_req( query: "#{@upload_query}/#{part}",
208
+ authorization: "token #{@auth_token}",
209
+ data: buffer
210
+ )
211
+ end
212
+ end
213
+
214
+ end
215
+ end