epo-ops 0.2.6 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +6 -0
  4. data/README.md +78 -38
  5. data/epo-ops.gemspec +2 -2
  6. data/lib/epo_ops.rb +46 -0
  7. data/lib/epo_ops/client.rb +46 -0
  8. data/lib/epo_ops/error.rb +87 -0
  9. data/lib/epo_ops/factories.rb +9 -0
  10. data/lib/epo_ops/factories/name_and_address_factory.rb +54 -0
  11. data/lib/epo_ops/factories/patent_application_factory.rb +116 -0
  12. data/lib/epo_ops/factories/register_search_result_factory.rb +42 -0
  13. data/lib/epo_ops/ipc_class_hierarchy.rb +146 -0
  14. data/lib/epo_ops/ipc_class_hierarchy_loader.rb +60 -0
  15. data/lib/epo_ops/ipc_class_util.rb +71 -0
  16. data/lib/epo_ops/limits.rb +20 -0
  17. data/lib/epo_ops/logger.rb +15 -0
  18. data/lib/epo_ops/name_and_address.rb +58 -0
  19. data/lib/epo_ops/patent_application.rb +159 -0
  20. data/lib/epo_ops/rate_limit.rb +47 -0
  21. data/lib/epo_ops/register.rb +100 -0
  22. data/lib/epo_ops/register_search_result.rb +40 -0
  23. data/lib/epo_ops/search_query_builder.rb +65 -0
  24. data/lib/epo_ops/token_store.rb +33 -0
  25. data/lib/epo_ops/token_store/redis.rb +45 -0
  26. data/lib/epo_ops/util.rb +52 -0
  27. data/lib/epo_ops/version.rb +3 -0
  28. metadata +26 -20
  29. data/lib/epo/ops.rb +0 -43
  30. data/lib/epo/ops/address.rb +0 -60
  31. data/lib/epo/ops/bibliographic_document.rb +0 -196
  32. data/lib/epo/ops/client.rb +0 -27
  33. data/lib/epo/ops/error.rb +0 -89
  34. data/lib/epo/ops/ipc_class_hierarchy.rb +0 -148
  35. data/lib/epo/ops/ipc_class_hierarchy_loader.rb +0 -62
  36. data/lib/epo/ops/ipc_class_util.rb +0 -73
  37. data/lib/epo/ops/limits.rb +0 -22
  38. data/lib/epo/ops/logger.rb +0 -11
  39. data/lib/epo/ops/rate_limit.rb +0 -49
  40. data/lib/epo/ops/register.rb +0 -152
  41. data/lib/epo/ops/search_query_builder.rb +0 -65
  42. data/lib/epo/ops/token_store.rb +0 -35
  43. data/lib/epo/ops/token_store/redis.rb +0 -47
  44. data/lib/epo/ops/util.rb +0 -32
  45. data/lib/epo/ops/version.rb +0 -6
@@ -1,62 +0,0 @@
1
- require 'httparty'
2
- require 'epo/ops/ipc_class_util'
3
-
4
- module Epo
5
- module Ops
6
- # Usually this should only used internally.
7
- # Loads the Hierarchy from the WIPO.
8
- # This is used to update IpcClassHierarchy manually.
9
- # At the beginning of the year the WIPO publishes a new list of IPC classes.
10
- # The IpcClassHierarchy should then be updated. Make sure that the url is
11
- # correct!
12
- class IpcClassHierarchyLoader
13
- # loads data from the WIPO
14
- # @return [Hash]
15
- def self.load
16
- load_url
17
- end
18
-
19
- private
20
-
21
- def self.load_url
22
- url = 'http://www.wipo.int/ipc/itos4ipc/ITSupport_and_download_area/20160101/IPC_scheme_title_list/EN_ipc_section_#letter_title_list_20160101.txt'
23
-
24
- # There is a file for every letter A-H
25
- ('A'..'H').inject({}) do |mem, letter|
26
- # Fetch the file from the server
27
- response = HTTParty.get(url.gsub('#letter', letter), http_proxyaddr: proxy[:addr], http_proxyport: proxy[:port])
28
- file = response.body
29
- mem.merge! process_file(file)
30
- end
31
- end
32
-
33
- def self.process_file(file)
34
- # Process every line (There is a line for every class entry, name and description are separated by a \t)
35
- file.each_line.inject(Hash.new { |h, k| h[k] = [] }) do |mem, line|
36
- next if line.to_s.strip.empty?
37
- ipc_class_generic, description = line.split("\t")
38
-
39
- # Some entries in the files have the same ipc class, the first line is
40
- # just some kind of headline, the second is the description we want.
41
- ipc_class = Epo::Ops::IpcClassUtil.parse_generic_format(ipc_class_generic)
42
- if ipc_class.length == 3
43
- mem[ipc_class[0]] << ipc_class
44
- elsif ipc_class.length == 4
45
- mem[ipc_class[0, 3]] << ipc_class
46
- end
47
- mem
48
- end
49
- end
50
-
51
- def self.proxy
52
- # configure proxy
53
- proxy_addr = nil
54
- proxy_port = nil
55
- unless ENV['http_proxy'].to_s.strip.empty?
56
- proxy_addr, proxy_port = ENV['http_proxy'].gsub('http://', '').gsub('/', '').split(':')
57
- end
58
- { addr: proxy_addr, port: proxy_port }
59
- end
60
- end
61
- end
62
- end
@@ -1,73 +0,0 @@
1
- require 'epo/ops/ipc_class_hierarchy'
2
-
3
- module Epo
4
- module Ops
5
- # Utility functions to work on Strings representing ipc classes.
6
- class IpcClassUtil
7
-
8
- # @return [Array] \['A', 'B', …, 'H'\]
9
- def self.main_classes
10
- %w( A B C D E F G H )
11
- end
12
-
13
- # check if the given ipc_class is valid as OPS search parameter
14
- # @param [String] ipc_class an ipc class
15
- # @return [Boolean]
16
- def self.valid_for_search?(ipc_class)
17
- ipc_class.match(/\A[A-H](\d{2}([A-Z](\d{1,2}\/\d{2,3})?)?)?\z/)
18
- end
19
-
20
- # There is a generic format for ipc classes that does not have
21
- # the / as delimiter and leaves space for additions. This parses
22
- # it into the format the register search understands
23
- # @param [String] generic ipc class in generic format
24
- # @return [String] reformatted ipc class
25
- # @example
26
- # parse_generic_format('A01B0003140000') #=> 'A01B3/14'
27
- def self.parse_generic_format(generic)
28
- ipc_class = generic
29
- if ipc_class.length > 4
30
- match = ipc_class.match(/([A-Z]\d{2}[A-Z])(\d{4})(\d{6})$/)
31
- ipc_class = match[1] + (match[2].to_i).to_s + '/' + process_number(match[3])
32
- end
33
- ipc_class
34
- end
35
-
36
- # @param [String] ipc_class an ipc_class
37
- # @return [Array] List of all ipc classes one level more specific.
38
- # @examples
39
- # children('A') #=> ['A01', 'A21', 'A22', 'A23', ...]
40
- # children('A62') #=> ['A62B', 'A62C', 'A62D'],
41
- # @raise [InvalidIpcClassError] if parameter is not a valid ipc class in
42
- # the format EPO understands
43
- # @raise [LevelNotSupportedError] for parameters with ipc class depth >= 3
44
- # e.g. 'A62B' cannot be split further. It is currently not necessary to
45
- # do so, it would only blow up the gem, and you do not want to query for
46
- # all classes at the lowest level, as it takes too many requests.
47
- def self.children(ipc_class)
48
- return main_classes if ipc_class.nil?
49
- valid = valid_for_search?(ipc_class)
50
- fail InvalidIpcClassError, ipc_class unless valid
51
- map = IpcClassHierarchy::Hierarchy
52
- fail LevelNotSupportedError, ipc_class unless map.key? ipc_class
53
- map[ipc_class]
54
- end
55
-
56
- # An ipc class in invalid format was given, or none at all.
57
- class InvalidIpcClassError < StandardError; end
58
- # It is currently not supported to split by the most specific class level.
59
- # This would result in a large amount of requests.
60
- class LevelNotSupportedError < StandardError; end
61
-
62
- private
63
-
64
- def self.process_number(number)
65
- result = number.gsub(/0+$/, '')
66
- result += '0' if result.length == 1
67
- result = '00' if result.length == 0
68
-
69
- result
70
- end
71
- end
72
- end
73
- end
@@ -1,22 +0,0 @@
1
- module Epo
2
- module Ops
3
- # The register search is limited by some parameters. With one
4
- # query one may only request as many as
5
- # {Epo::Ops::Limits::MAX_QUERY_INTERVAL} references at once.
6
- # Considering this, you have to split your requests by this
7
- # interval. Nevertheless, the maximum value you may use is
8
- # {Epo::Ops::Limits::MAX_QUERY_RANGE}. If you want to retrieve more
9
- # references you must split by other parameters.
10
- # @see Register
11
- class Limits
12
- # @return [Integer] The range in which you can search is limited, say you
13
- # cannot request all patents of a given class at once, you probably must
14
- # split your requests by additional conditions.
15
- MAX_QUERY_RANGE = 2000
16
-
17
- # @return [Integer] The maximum number of elements you may search with one
18
- # query. Ignoring this will result in errors.
19
- MAX_QUERY_INTERVAL = 100
20
- end
21
- end
22
- end
@@ -1,11 +0,0 @@
1
- module Epo
2
- module Ops
3
- # Simple logger writing some notifications to standard output.
4
- class Logger
5
- # Just hands the parameter to puts.
6
- def self.log(output)
7
- puts output
8
- end
9
- end
10
- end
11
- end
@@ -1,49 +0,0 @@
1
- module Epo
2
- module Ops
3
- class RateLimit
4
- WEEKLY_QUOTA_RESET_TIME = 604_800
5
- HOURLY_QUOTA_RESET_TIME = 600
6
- BASE_RESET_TIME = 60
7
-
8
- attr_reader :attr
9
-
10
- def initialize(http_header)
11
- fail "Rate Limit data should be a Hash but is #{http_header.inspect} (#{http_header.class.name})" unless http_header.is_a?(Hash)
12
- @attr = http_header
13
- end
14
-
15
- def limit_reached?
16
- @attr.key?('x-rejection-reason')
17
- end
18
-
19
- def rejection_reason
20
- return nil unless @attr['x-rejection-reason']
21
- case @attr['x-rejection-reason']
22
- when 'RegisteredQuotaPerWeek' then :weekly_quota
23
- when 'IndividualQuotaPerHour' then :hourly_quota
24
- else :unknown_reason
25
- end
26
- end
27
-
28
- def hourly_quota
29
- quota = @attr['x-individualquotaperhour-used']
30
- quota.to_i if quota
31
- end
32
-
33
- def weekly_quota
34
- quota = @attr['x-registeredquotaperweek-used']
35
- quota.to_i if quota
36
- end
37
-
38
- def reset_at
39
- return unless limit_reached?
40
-
41
- case rejection_reason
42
- when :weekly_quota then Time.now.to_i + WEEKLY_QUOTA_RESET_TIME
43
- when :hourly_quota then Time.now.to_i + HOURLY_QUOTA_RESET_TIME
44
- else Time.now.to_i + BASE_RESET_TIME
45
- end
46
- end
47
- end
48
- end
49
- end
@@ -1,152 +0,0 @@
1
- require 'epo/ops'
2
- require 'epo/ops/client'
3
- require 'epo/ops/util'
4
- require 'epo/ops/bibliographic_document'
5
- require 'epo/ops/logger'
6
- require 'epo/ops/ipc_class_util'
7
-
8
- module Epo
9
- module Ops
10
- # Access to the {http://ops.epo.org/3.2/rest-services/register register}
11
- # endpoint of the EPO OPS API.
12
- #
13
- # By now you can search and retrieve patents by using the type `application`
14
- # in the `epodoc` format.
15
- #
16
- # Search queries are limited by size, not following these limits
17
- # will result in errors. You should probably use {.search} which handles the
18
- # limits itself.
19
- #
20
- # For more fine grained control use {.raw_search} and {.raw_biblio}
21
- #
22
- # @see Limits
23
- # @see SearchQueryBuilder
24
- class Register
25
- # A helper method which creates queries that take API limits into account.
26
- # @param patent_count [Integer] number of overall results expected.
27
- # See {.published_patents_count}
28
- #
29
- # @return [Array] of Strings, each a query to put into {Register.raw_search}
30
- # @see Epo::Ops::Limits
31
- def self.split_by_size_limits(ipc_class, date, patent_count)
32
- max_interval = Limits::MAX_QUERY_INTERVAL
33
- (1..patent_count).step(max_interval).map do |start|
34
- range_end = [start + max_interval - 1, patent_count].min
35
- Epo::Ops::SearchQueryBuilder.build(ipc_class, date, start, range_end)
36
- end
37
- end
38
-
39
- # Makes the requests to find how many patents are in each top
40
- # level ipc class on a given date.
41
- #
42
- # @param date [Date] date on which patents should be counted
43
- # @return [Hash] Hash ipc_class => count (ipc_class A-H)
44
- def self.patent_counts_per_ipc_class(date)
45
- %w( A B C D E F G H ).inject({}) do |mem, icc|
46
- mem[icc] = published_patents_counts(icc, date)
47
- mem
48
- end
49
- end
50
-
51
- # @param date [Date]
52
- # @param ipc_class [String] up to now should only be between A-H
53
- # @return [Integer] number of patents with given parameters
54
- def self.published_patents_counts(ipc_class = nil, date = nil)
55
- query = SearchQueryBuilder.build(ipc_class, date, 1, 2)
56
- minimum_result_set = Register.raw_search(query, true)
57
- return 0 if minimum_result_set.empty?
58
- minimum_result_set['world_patent_data']['register_search']['total_result_count'].to_i
59
- end
60
-
61
- # Search method returning all unique register references on a given
62
- # date, with optional ipc_class.
63
- # @note This method does more than one query; it may happen that you
64
- # exceed your API limits
65
- # @return [Array] Array of {SearchEntry}
66
- def self.search(ipc_class = nil, date = nil)
67
- queries = all_queries(ipc_class, date)
68
- search_entries = queries.flat_map { |query| raw_search(query) }
69
- search_entries.uniq { |se| se.application_reference.epodoc_reference }
70
- end
71
-
72
- # @return [Array] Array of Strings containing queries applicable to
73
- # {Register.raw_search}.
74
- # builds all queries necessary to find all patent references on a given
75
- # date.
76
- def self.all_queries(ipc_class = nil, date = nil)
77
- count = published_patents_counts(ipc_class, date)
78
- if count > Limits::MAX_QUERY_RANGE
79
- IpcClassUtil.children(ipc_class).flat_map { |ic| all_queries(ic, date) }
80
- else
81
- split_by_size_limits(ipc_class, date, count)
82
- end
83
- end
84
-
85
- # @param query A query built with {Epo::Ops::SearchQueryBuilder}
86
- # @param raw if `true` the result will be the raw response as a nested
87
- # hash. if false(default) the result will be parsed further, returning a
88
- # list of [SearchEntry]
89
- # @return [Array] containing {SearchEntry}
90
- def self.raw_search(query, raw = false)
91
- hash = Client.request(:get, register_api_string + 'search?' + query).parsed
92
- return parse_search_results(hash) unless raw
93
- hash
94
- rescue Epo::Ops::Error::NotFound
95
- []
96
- end
97
-
98
- # @param search_entry [SearchEntry] a search entry which should be
99
- # retrieved.
100
- # @return [BibliographicDocument] a parsed document.
101
- def self.biblio(search_entry)
102
- raw_biblio(search_entry.application_reference.epodoc_reference)
103
- end
104
-
105
- # @param reference_id [String] identifier for document. Format similar to
106
- # EP1000000
107
- # @param format [String] epodoc is a format defined by the EPO for a
108
- # document id. see their documentation.
109
- # @param type [String] may be `application` or `publication` make sure
110
- # that the `reference_id` is matching
111
- # @param raw [Boolean] flag if the result should be returned as a raw Hash
112
- # or parsed as {BibliographicDocument}
113
- # @return [BibliographicDocument, Hash]
114
- def self.raw_biblio(reference_id, type = 'application', format = 'epodoc', raw = false)
115
- request = "#{register_api_string}#{type}/#{format}/#{reference_id}/biblio"
116
- result = Client.request(:get, request).parsed
117
- raw ? result : BibliographicDocument.new(result)
118
- end
119
-
120
- Reference = Struct.new(:country, :doc_number, :date) do
121
- def epodoc_reference
122
- country + doc_number
123
- end
124
- end
125
-
126
- SearchEntry = Struct.new(:publication_reference, :application_reference, :ipc_classes)
127
-
128
- private
129
-
130
- def self.parse_search_results(result)
131
- path = %w(world_patent_data register_search register_documents register_document bibliographic_data)
132
-
133
- list = Util.find_in_data(result, path)
134
- list.map do |entry|
135
- publication_reference = Reference.new(
136
- entry['publication_reference']['document_id']['country'],
137
- entry['publication_reference']['document_id']['doc_number'],
138
- entry['publication_reference']['document_id']['date'])
139
- application_reference = Reference.new(
140
- entry['application_reference']['document_id']['country'],
141
- entry['application_reference']['document_id']['doc_number'])
142
- ipc_classes = entry['classifications_ipcr']['classification_ipcr']['text'].split(';;').map(&:strip)
143
- SearchEntry.new(publication_reference, application_reference, ipc_classes)
144
- end
145
- end
146
-
147
- def self.register_api_string
148
- "/#{Epo::Ops::API_VERSION}/rest-services/register/"
149
- end
150
- end
151
- end
152
- end
@@ -1,65 +0,0 @@
1
- require 'epo/ops/limits'
2
- require 'epo/ops/logger'
3
-
4
- module Epo
5
- module Ops
6
- # This Builder helps creating a search query using
7
- # {https://www.loc.gov/standards/sru/cql/ CQL} (Common Query Language or
8
- # Contextual Query Language) with the identifiers specified by the EPO in
9
- # the OPS Documentation chapter 4.2 ({https://www.epo.org/searching-for-patents/technical/espacenet/ops.html Link})
10
- # - use tab Downloads and see file 'OPS version 3.1 documentation').
11
- class SearchQueryBuilder
12
- # Build the query with the given parameters. Invalid ranges are fixed
13
- # automatically and you will be notified about the changes
14
- # @return [String]
15
- def self.build(ipc_class, date, range_start = 1, range_end = nil)
16
- validated_range = validate_range range_start, range_end
17
- "q=#{build_params(ipc_class, date)}&Range=#{validated_range[0]}-#{validated_range[1]}"
18
- end
19
-
20
- private
21
-
22
- def self.build_params(ipc_class, date)
23
- [build_date(date), build_class(ipc_class)].compact.join(' and ')
24
- end
25
-
26
- def self.build_date(date)
27
- if date
28
- "pd=#{('%04d' % date.year)}"\
29
- "#{('%02d' % date.month)}"\
30
- "#{('%02d' % date.day)}"
31
- end
32
- end
33
-
34
- def self.build_class(ipc_class)
35
- "ic=#{ipc_class}" if ipc_class
36
- end
37
-
38
- # Fixes the range given so that they meed the EPO APIs rules. The range
39
- # may only be 100 elements long, the maximum allowed value is 2000.
40
- # If the given window is out of range, it will be moved preserving the
41
- # distance covered.
42
- # @see Epo::Ops::Limits
43
- # @return array with two elements: [range_start, range_end]
44
- def self.validate_range(range_start, range_end)
45
- if range_start > range_end
46
- range_start, range_end = range_end, range_start
47
- Logger.log('range_start was bigger than range_end, swapped values')
48
- elsif range_end - range_start > Limits::MAX_QUERY_INTERVAL - 1
49
- range_end = range_start + Limits::MAX_QUERY_INTERVAL - 1
50
- Logger.log("range invalid, set to: #{[range_start, range_end]}")
51
- end
52
- if range_start < 1
53
- range_end = range_end - range_start + 1
54
- range_start = 1
55
- Logger.log("range_start must be > 0, set to: #{[range_start, range_end]}")
56
- elsif range_end > Limits::MAX_QUERY_RANGE
57
- range_start = Limits::MAX_QUERY_RANGE - (range_end - range_start)
58
- range_end = Limits::MAX_QUERY_RANGE
59
- Logger.log("range_end was too big, set to: #{[range_start, range_end]}")
60
- end
61
- [range_start, range_end]
62
- end
63
- end
64
- end
65
- end
@@ -1,35 +0,0 @@
1
- require 'oauth2'
2
- require 'epo/ops'
3
-
4
- module Epo
5
- module Ops
6
- # This class saves the token in memory, you may want to subclass this and
7
- # overwrite #token if you want to store it somewhere else.
8
- #
9
- class TokenStore
10
- def token
11
- @token = generate_token if !@token || @token.expired?
12
-
13
- @token
14
- end
15
-
16
- def reset
17
- @token = nil
18
- end
19
-
20
- protected
21
-
22
- def generate_token
23
- client = OAuth2::Client.new(
24
- Epo::Ops.config.consumer_key,
25
- Epo::Ops.config.consumer_secret,
26
- site: 'https://ops.epo.org/',
27
- token_url: "/#{Epo::Ops::API_VERSION}/auth/accesstoken",
28
- raise_errors: false
29
- )
30
-
31
- client.client_credentials.get_token
32
- end
33
- end
34
- end
35
- end