epo-ops 0.2.6 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +6 -0
  4. data/README.md +78 -38
  5. data/epo-ops.gemspec +2 -2
  6. data/lib/epo_ops.rb +46 -0
  7. data/lib/epo_ops/client.rb +46 -0
  8. data/lib/epo_ops/error.rb +87 -0
  9. data/lib/epo_ops/factories.rb +9 -0
  10. data/lib/epo_ops/factories/name_and_address_factory.rb +54 -0
  11. data/lib/epo_ops/factories/patent_application_factory.rb +116 -0
  12. data/lib/epo_ops/factories/register_search_result_factory.rb +42 -0
  13. data/lib/epo_ops/ipc_class_hierarchy.rb +146 -0
  14. data/lib/epo_ops/ipc_class_hierarchy_loader.rb +60 -0
  15. data/lib/epo_ops/ipc_class_util.rb +71 -0
  16. data/lib/epo_ops/limits.rb +20 -0
  17. data/lib/epo_ops/logger.rb +15 -0
  18. data/lib/epo_ops/name_and_address.rb +58 -0
  19. data/lib/epo_ops/patent_application.rb +159 -0
  20. data/lib/epo_ops/rate_limit.rb +47 -0
  21. data/lib/epo_ops/register.rb +100 -0
  22. data/lib/epo_ops/register_search_result.rb +40 -0
  23. data/lib/epo_ops/search_query_builder.rb +65 -0
  24. data/lib/epo_ops/token_store.rb +33 -0
  25. data/lib/epo_ops/token_store/redis.rb +45 -0
  26. data/lib/epo_ops/util.rb +52 -0
  27. data/lib/epo_ops/version.rb +3 -0
  28. metadata +26 -20
  29. data/lib/epo/ops.rb +0 -43
  30. data/lib/epo/ops/address.rb +0 -60
  31. data/lib/epo/ops/bibliographic_document.rb +0 -196
  32. data/lib/epo/ops/client.rb +0 -27
  33. data/lib/epo/ops/error.rb +0 -89
  34. data/lib/epo/ops/ipc_class_hierarchy.rb +0 -148
  35. data/lib/epo/ops/ipc_class_hierarchy_loader.rb +0 -62
  36. data/lib/epo/ops/ipc_class_util.rb +0 -73
  37. data/lib/epo/ops/limits.rb +0 -22
  38. data/lib/epo/ops/logger.rb +0 -11
  39. data/lib/epo/ops/rate_limit.rb +0 -49
  40. data/lib/epo/ops/register.rb +0 -152
  41. data/lib/epo/ops/search_query_builder.rb +0 -65
  42. data/lib/epo/ops/token_store.rb +0 -35
  43. data/lib/epo/ops/token_store/redis.rb +0 -47
  44. data/lib/epo/ops/util.rb +0 -32
  45. data/lib/epo/ops/version.rb +0 -6
@@ -0,0 +1,47 @@
1
+ module EpoOps
2
+ class RateLimit
3
+ WEEKLY_QUOTA_RESET_TIME = 604_800
4
+ HOURLY_QUOTA_RESET_TIME = 600
5
+ BASE_RESET_TIME = 60
6
+
7
+ attr_reader :attr
8
+
9
+ def initialize(http_header)
10
+ fail "Rate Limit data should be a Hash but is #{http_header.inspect} (#{http_header.class.name})" unless http_header.is_a?(Hash)
11
+ @attr = http_header
12
+ end
13
+
14
+ def limit_reached?
15
+ @attr.key?('x-rejection-reason')
16
+ end
17
+
18
+ def rejection_reason
19
+ return nil unless @attr['x-rejection-reason']
20
+ case @attr['x-rejection-reason']
21
+ when 'RegisteredQuotaPerWeek' then :weekly_quota
22
+ when 'IndividualQuotaPerHour' then :hourly_quota
23
+ else :unknown_reason
24
+ end
25
+ end
26
+
27
+ def hourly_quota
28
+ quota = @attr['x-individualquotaperhour-used']
29
+ quota.to_i if quota
30
+ end
31
+
32
+ def weekly_quota
33
+ quota = @attr['x-registeredquotaperweek-used']
34
+ quota.to_i if quota
35
+ end
36
+
37
+ def reset_at
38
+ return unless limit_reached?
39
+
40
+ case rejection_reason
41
+ when :weekly_quota then Time.now.to_i + WEEKLY_QUOTA_RESET_TIME
42
+ when :hourly_quota then Time.now.to_i + HOURLY_QUOTA_RESET_TIME
43
+ else Time.now.to_i + BASE_RESET_TIME
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,100 @@
1
+ require 'epo_ops'
2
+ require 'epo_ops/client'
3
+ require 'epo_ops/util'
4
+ require 'epo_ops/logger'
5
+ require 'epo_ops/ipc_class_util'
6
+
7
+ module EpoOps
8
+ # Access to the {http://ops.epo.org/3.1/rest-services/register register}
9
+ # endpoint of the EPO OPS API.
10
+ #
11
+ # By now you can search and retrieve patents by using the type `application`
12
+ # in the `epodoc` format.
13
+ #
14
+ # Search queries are limited by size, not following these limits
15
+ # will result in errors. You should probably use {.search} which handles the
16
+ # limits itself.
17
+ #
18
+ # For more fine grained control use {.raw_search} and {.raw_biblio}
19
+ #
20
+ # @see Limits
21
+ # @see SearchQueryBuilder
22
+ class Register
23
+ # A helper method which creates queries that take API limits into account.
24
+ # @param patent_count [Integer] number of overall results expected.
25
+ # See {.published_patents_count}
26
+ #
27
+ # @return [Array] of Strings, each a query to put into {Register.raw_search}
28
+ # @see EpoOps::Limits
29
+ def self.split_by_size_limits(ipc_class, date, patent_count)
30
+ max_interval = Limits::MAX_QUERY_INTERVAL
31
+ (1..patent_count).step(max_interval).map do |start|
32
+ range_end = [start + max_interval - 1, patent_count].min
33
+ EpoOps::SearchQueryBuilder.build(ipc_class, date, start, range_end)
34
+ end
35
+ end
36
+
37
+ # Makes the requests to find how many patents are in each top
38
+ # level ipc class on a given date.
39
+ #
40
+ # @param date [Date] date on which patents should be counted
41
+ # @return [Hash] Hash ipc_class => count (ipc_class A-H)
42
+ def self.patent_counts_per_ipc_class(date)
43
+ %w( A B C D E F G H ).inject({}) do |mem, icc|
44
+ mem[icc] = published_patents_counts(icc, date)
45
+ mem
46
+ end
47
+ end
48
+
49
+ # @param date [Date]
50
+ # @param ipc_class [String] up to now should only be between A-H
51
+ # @return [Integer] number of patents with given parameters
52
+ def self.published_patents_counts(ipc_class = nil, date = nil)
53
+ query = SearchQueryBuilder.build(ipc_class, date, 1, 2)
54
+ minimum_result_set = Register.raw_search(query)
55
+ minimum_result_set.count
56
+ end
57
+
58
+ # Search method returning all unique register references on a given
59
+ # date, with optional ipc_class.
60
+ # @note This method does more than one query; it may happen that you
61
+ # exceed your API limits
62
+ # @return [Array] Array of {SearchEntry}
63
+ def self.search(ipc_class = nil, date = nil)
64
+ queries = all_queries(ipc_class, date)
65
+ search_entries = queries.map { |query| raw_search(query) }
66
+ applications = search_entries.collect(&:patents)
67
+
68
+ EpoOps::RegisterSearchResult.new(applications,applications.count)
69
+ end
70
+
71
+ # @return [Array] Array of Strings containing queries applicable to
72
+ # {Register.raw_search}.
73
+ # builds all queries necessary to find all patent references on a given
74
+ # date.
75
+ def self.all_queries(ipc_class = nil, date = nil)
76
+ count = published_patents_counts(ipc_class, date)
77
+ if count > Limits::MAX_QUERY_RANGE
78
+ IpcClassUtil.children(ipc_class).flat_map { |ic| all_queries(ic, date) }
79
+ else
80
+ split_by_size_limits(ipc_class, date, count)
81
+ end
82
+ end
83
+
84
+ # @param query A query built with {EpoOps::SearchQueryBuilder}
85
+ # @param raw if `true` the result will be the raw response as a nested
86
+ # hash. if false(default) the result will be parsed further, returning a
87
+ # list of [SearchEntry]
88
+ # @return [RegisterSearchResult]
89
+ def self.raw_search(query, raw = false)
90
+ data = Client.request(
91
+ :get,
92
+ '/3.1/rest-services/register/search?' + query
93
+ ).parsed
94
+
95
+ EpoOps::Factories::RegisterSearchResultFactory.build(data)
96
+ rescue EpoOps::Error::NotFound
97
+ raw ? nil : EpoOps::RegisterSearchResult::NullResult.new
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,40 @@
1
+ module EpoOps
2
+ # A simple wrapper for register search query result.
3
+ class RegisterSearchResult
4
+ include Enumerable
5
+
6
+ def initialize(patents,count,raw_data = nil)
7
+ @patents = patents
8
+ @count = count
9
+ @raw_data = raw_data
10
+ end
11
+
12
+ # The number of patents that match the query string. Offsets and API query limits do not apply
13
+ # so that the actual number of patents returned can be much smaller.
14
+ # @see EpoOps::Limits
15
+ # @return [integer] The number of applications matching the query.
16
+ attr_reader :count
17
+
18
+ # @return [Array] the patents returned by the search. Patentapplication data is not complete
19
+ attr_reader :patents
20
+
21
+ def each
22
+ patents.each do |patent|
23
+ yield(patent)
24
+ end
25
+ end
26
+
27
+ # Represents queries with no results
28
+ class NullResult < EpoOps::RegisterSearchResult
29
+ def initialize(data=nil) ; end
30
+
31
+ def count
32
+ 0
33
+ end
34
+
35
+ def patents
36
+ []
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,65 @@
1
+ require 'epo_ops/limits'
2
+ require 'epo_ops/logger'
3
+
4
+ module EpoOps
5
+ # This Builder helps creating a search query using
6
+ # {https://www.loc.gov/standards/sru/cql/ CQL} (Common Query Language or
7
+ # Contextual Query Language) with the identifiers specified by the EPO in
8
+ # the OPS Documentation chapter 4.2 ({https://www.epo.org/searching-for-patents/technical/espacenet/ops.html Link})
9
+ # - use tab Downloads and see file 'OPS version 3.1 documentation').
10
+ class SearchQueryBuilder
11
+ # Build the query with the given parameters. Invalid ranges are fixed
12
+ # automatically and you will be notified about the changes
13
+ # @return [String]
14
+ def self.build(ipc_class, date, range_start = nil, range_end = nil)
15
+ validated_range = validate_range range_start, range_end
16
+ "q=#{build_params(ipc_class, date)}&Range=#{validated_range[0]}-#{validated_range[1]}"
17
+ end
18
+
19
+ private
20
+
21
+ def self.build_params(ipc_class, date)
22
+ [build_date(date), build_class(ipc_class)].compact.join(' and ')
23
+ end
24
+
25
+ def self.build_date(date)
26
+ if date
27
+ "pd=#{('%04d' % date.year)}"\
28
+ "#{('%02d' % date.month)}"\
29
+ "#{('%02d' % date.day)}"
30
+ end
31
+ end
32
+
33
+ def self.build_class(ipc_class)
34
+ "ic=#{ipc_class}" if ipc_class
35
+ end
36
+
37
+ # Fixes the range given so that they meed the EPO APIs rules. The range
38
+ # may only be 100 elements long, the maximum allowed value is 2000.
39
+ # If the given window is out of range, it will be moved preserving the
40
+ # distance covered.
41
+ # @see EpoOps::Limits
42
+ # @return array with two elements: [range_start, range_end]
43
+ def self.validate_range(range_start, range_end)
44
+ range_start = 1 unless range_start
45
+ range_end = 10 unless range_end
46
+ if range_start > range_end
47
+ range_start, range_end = range_end, range_start
48
+ Logger.debug('range_start was bigger than range_end, swapped values')
49
+ elsif range_end - range_start > Limits::MAX_QUERY_INTERVAL - 1
50
+ range_end = range_start + Limits::MAX_QUERY_INTERVAL - 1
51
+ Logger.debug("range invalid, set to: #{[range_start, range_end]}")
52
+ end
53
+ if range_start < 1
54
+ range_end = range_end - range_start + 1
55
+ range_start = 1
56
+ Logger.debug("range_start must be > 0, set to: #{[range_start, range_end]}")
57
+ elsif range_end > Limits::MAX_QUERY_RANGE
58
+ range_start = Limits::MAX_QUERY_RANGE - (range_end - range_start)
59
+ range_end = Limits::MAX_QUERY_RANGE
60
+ Logger.debug("range_end was too big, set to: #{[range_start, range_end]}")
61
+ end
62
+ [range_start, range_end]
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,33 @@
1
+ require 'oauth2'
2
+ require 'epo_ops'
3
+
4
+ module EpoOps
5
+ # This class saves the token in memory, you may want to subclass this and
6
+ # overwrite #token if you want to store it somewhere else.
7
+ #
8
+ class TokenStore
9
+ def token
10
+ @token = generate_token if !@token || @token.expired?
11
+
12
+ @token
13
+ end
14
+
15
+ def reset
16
+ @token = nil
17
+ end
18
+
19
+ protected
20
+
21
+ def generate_token
22
+ client = OAuth2::Client.new(
23
+ EpoOps.config.consumer_key,
24
+ EpoOps.config.consumer_secret,
25
+ site: 'https://ops.epo.org/',
26
+ token_url: '/3.1/auth/accesstoken',
27
+ raise_errors: false
28
+ )
29
+
30
+ client.client_credentials.get_token
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,45 @@
1
+ require 'redis'
2
+ require 'connection_pool'
3
+
4
+ module EpoOps
5
+ class TokenStore
6
+ class Redis < TokenStore
7
+ def initialize(redis_host)
8
+ fail "Please install gems 'redis' and 'connection_pool' to use this feature" unless defined?(::Redis) && defined?(ConnectionPool)
9
+
10
+ @redis = ConnectionPool.new(size: 5, timeout: 5) { ::Redis.new(host: redis_host) }
11
+ end
12
+
13
+ def token
14
+ token = nil
15
+ @redis.with do |conn|
16
+ token = conn.get("epo_token_#{id}")
17
+ end
18
+
19
+ token.present? ? OAuth2::AccessToken.new(client, token) : generate_token
20
+ end
21
+
22
+ def reset
23
+ @redis.with do |conn|
24
+ conn.del("epo_token_#{id}")
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def id
31
+ Digest::MD5.hexdigest(EpoOps.config.consumer_key + EpoOps.config.consumer_secret)
32
+ end
33
+
34
+ def generate_token
35
+ token = super
36
+
37
+ @redis.with do |conn|
38
+ conn.set("epo_token_#{id}", token.token, ex: token.expires_in, nx: true)
39
+ end
40
+
41
+ token
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ module EpoOps
2
+ class Util
3
+ # the path should be an array of strings indicating the path you want to go in the hash
4
+ def self.find_in_data(epo_hash, path)
5
+ path.reduce(epo_hash) { |res, c| parse_hash_flat(res, c) }
6
+ end
7
+
8
+ def self.parse_hash_flat(hash_layer, target)
9
+ result = []
10
+ if hash_layer.nil?
11
+ return []
12
+ elsif hash_layer.class == String
13
+ return []
14
+ elsif hash_layer.class == Array
15
+ result.concat(hash_layer.map { |x| parse_hash_flat(x, target) })
16
+ elsif hash_layer[target]
17
+ result << hash_layer[target]
18
+ elsif hash_layer.class == Hash || hash_layer.respond_to?(:to_h)
19
+ result.concat(hash_layer.to_h.map { |_x, y| parse_hash_flat(y, target) })
20
+ end
21
+ result.flatten
22
+ end
23
+
24
+ def self.dig(data,*path)
25
+ path.flatten.inject(data) do |d,key|
26
+ if d.is_a? Hash
27
+ d[key]
28
+ else
29
+ nil
30
+ end
31
+ end
32
+ end
33
+
34
+ def self.flat_dig(data,*path)
35
+ path.flatten.inject(data) do |d,key|
36
+ if d.is_a? Hash
37
+ d[key].is_a?(Array) ? d[key] : [d[key]]
38
+ elsif d.is_a? Array
39
+ d.select {|element| element.is_a? Hash}.flat_map {|element| element[key]}
40
+ else
41
+ []
42
+ end
43
+ end.reject(&:nil?)
44
+ end
45
+
46
+ def self.parse_change_gazette_num(num)
47
+ res = /^(?<year>\d{4})\/(?<week>\d{2})$/.match(num)
48
+ return nil if res.nil?
49
+ Date.commercial(Integer(res[:year], 10), week = Integer(res[:week], 10))
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,3 @@
1
+ module EpoOps
2
+ VERSION = '0.3.0'.freeze
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epo-ops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Max Kießling
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2017-07-04 00:00:00.000000000 Z
13
+ date: 2016-05-10 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -174,28 +174,34 @@ extensions: []
174
174
  extra_rdoc_files: []
175
175
  files:
176
176
  - ".gitignore"
177
+ - ".travis.yml"
177
178
  - Gemfile
178
179
  - LICENSE
179
180
  - README.md
180
181
  - Rakefile
181
182
  - epo-ops.gemspec
182
- - lib/epo/ops.rb
183
- - lib/epo/ops/address.rb
184
- - lib/epo/ops/bibliographic_document.rb
185
- - lib/epo/ops/client.rb
186
- - lib/epo/ops/error.rb
187
- - lib/epo/ops/ipc_class_hierarchy.rb
188
- - lib/epo/ops/ipc_class_hierarchy_loader.rb
189
- - lib/epo/ops/ipc_class_util.rb
190
- - lib/epo/ops/limits.rb
191
- - lib/epo/ops/logger.rb
192
- - lib/epo/ops/rate_limit.rb
193
- - lib/epo/ops/register.rb
194
- - lib/epo/ops/search_query_builder.rb
195
- - lib/epo/ops/token_store.rb
196
- - lib/epo/ops/token_store/redis.rb
197
- - lib/epo/ops/util.rb
198
- - lib/epo/ops/version.rb
183
+ - lib/epo_ops.rb
184
+ - lib/epo_ops/client.rb
185
+ - lib/epo_ops/error.rb
186
+ - lib/epo_ops/factories.rb
187
+ - lib/epo_ops/factories/name_and_address_factory.rb
188
+ - lib/epo_ops/factories/patent_application_factory.rb
189
+ - lib/epo_ops/factories/register_search_result_factory.rb
190
+ - lib/epo_ops/ipc_class_hierarchy.rb
191
+ - lib/epo_ops/ipc_class_hierarchy_loader.rb
192
+ - lib/epo_ops/ipc_class_util.rb
193
+ - lib/epo_ops/limits.rb
194
+ - lib/epo_ops/logger.rb
195
+ - lib/epo_ops/name_and_address.rb
196
+ - lib/epo_ops/patent_application.rb
197
+ - lib/epo_ops/rate_limit.rb
198
+ - lib/epo_ops/register.rb
199
+ - lib/epo_ops/register_search_result.rb
200
+ - lib/epo_ops/search_query_builder.rb
201
+ - lib/epo_ops/token_store.rb
202
+ - lib/epo_ops/token_store/redis.rb
203
+ - lib/epo_ops/util.rb
204
+ - lib/epo_ops/version.rb
199
205
  homepage: https://github.com/FHG-IMW/epo-ops
200
206
  licenses: []
201
207
  metadata: {}
@@ -215,7 +221,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
215
221
  version: '0'
216
222
  requirements: []
217
223
  rubyforge_project:
218
- rubygems_version: 2.5.2
224
+ rubygems_version: 2.4.8
219
225
  signing_key:
220
226
  specification_version: 4
221
227
  summary: Ruby interface to the European Patent Office API (OPS)