epo-ops 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.travis.yml +6 -0
  4. data/README.md +78 -38
  5. data/epo-ops.gemspec +2 -2
  6. data/lib/epo_ops.rb +46 -0
  7. data/lib/epo_ops/client.rb +46 -0
  8. data/lib/epo_ops/error.rb +87 -0
  9. data/lib/epo_ops/factories.rb +9 -0
  10. data/lib/epo_ops/factories/name_and_address_factory.rb +54 -0
  11. data/lib/epo_ops/factories/patent_application_factory.rb +116 -0
  12. data/lib/epo_ops/factories/register_search_result_factory.rb +42 -0
  13. data/lib/epo_ops/ipc_class_hierarchy.rb +146 -0
  14. data/lib/epo_ops/ipc_class_hierarchy_loader.rb +60 -0
  15. data/lib/epo_ops/ipc_class_util.rb +71 -0
  16. data/lib/epo_ops/limits.rb +20 -0
  17. data/lib/epo_ops/logger.rb +15 -0
  18. data/lib/epo_ops/name_and_address.rb +58 -0
  19. data/lib/epo_ops/patent_application.rb +159 -0
  20. data/lib/epo_ops/rate_limit.rb +47 -0
  21. data/lib/epo_ops/register.rb +100 -0
  22. data/lib/epo_ops/register_search_result.rb +40 -0
  23. data/lib/epo_ops/search_query_builder.rb +65 -0
  24. data/lib/epo_ops/token_store.rb +33 -0
  25. data/lib/epo_ops/token_store/redis.rb +45 -0
  26. data/lib/epo_ops/util.rb +52 -0
  27. data/lib/epo_ops/version.rb +3 -0
  28. metadata +26 -20
  29. data/lib/epo/ops.rb +0 -43
  30. data/lib/epo/ops/address.rb +0 -60
  31. data/lib/epo/ops/bibliographic_document.rb +0 -196
  32. data/lib/epo/ops/client.rb +0 -27
  33. data/lib/epo/ops/error.rb +0 -89
  34. data/lib/epo/ops/ipc_class_hierarchy.rb +0 -148
  35. data/lib/epo/ops/ipc_class_hierarchy_loader.rb +0 -62
  36. data/lib/epo/ops/ipc_class_util.rb +0 -73
  37. data/lib/epo/ops/limits.rb +0 -22
  38. data/lib/epo/ops/logger.rb +0 -11
  39. data/lib/epo/ops/rate_limit.rb +0 -49
  40. data/lib/epo/ops/register.rb +0 -152
  41. data/lib/epo/ops/search_query_builder.rb +0 -65
  42. data/lib/epo/ops/token_store.rb +0 -35
  43. data/lib/epo/ops/token_store/redis.rb +0 -47
  44. data/lib/epo/ops/util.rb +0 -32
  45. data/lib/epo/ops/version.rb +0 -6
@@ -1,62 +0,0 @@
1
- require 'httparty'
2
- require 'epo/ops/ipc_class_util'
3
-
4
- module Epo
5
- module Ops
6
- # Usually this should only used internally.
7
- # Loads the Hierarchy from the WIPO.
8
- # This is used to update IpcClassHierarchy manually.
9
- # At the beginning of the year the WIPO publishes a new list of IPC classes.
10
- # The IpcClassHierarchy should then be updated. Make sure that the url is
11
- # correct!
12
- class IpcClassHierarchyLoader
13
- # loads data from the WIPO
14
- # @return [Hash]
15
- def self.load
16
- load_url
17
- end
18
-
19
- private
20
-
21
- def self.load_url
22
- url = 'http://www.wipo.int/ipc/itos4ipc/ITSupport_and_download_area/20160101/IPC_scheme_title_list/EN_ipc_section_#letter_title_list_20160101.txt'
23
-
24
- # There is a file for every letter A-H
25
- ('A'..'H').inject({}) do |mem, letter|
26
- # Fetch the file from the server
27
- response = HTTParty.get(url.gsub('#letter', letter), http_proxyaddr: proxy[:addr], http_proxyport: proxy[:port])
28
- file = response.body
29
- mem.merge! process_file(file)
30
- end
31
- end
32
-
33
- def self.process_file(file)
34
- # Process every line (There is a line for every class entry, name and description are separated by a \t)
35
- file.each_line.inject(Hash.new { |h, k| h[k] = [] }) do |mem, line|
36
- next if line.to_s.strip.empty?
37
- ipc_class_generic, description = line.split("\t")
38
-
39
- # Some entries in the files have the same ipc class, the first line is
40
- # just some kind of headline, the second is the description we want.
41
- ipc_class = Epo::Ops::IpcClassUtil.parse_generic_format(ipc_class_generic)
42
- if ipc_class.length == 3
43
- mem[ipc_class[0]] << ipc_class
44
- elsif ipc_class.length == 4
45
- mem[ipc_class[0, 3]] << ipc_class
46
- end
47
- mem
48
- end
49
- end
50
-
51
- def self.proxy
52
- # configure proxy
53
- proxy_addr = nil
54
- proxy_port = nil
55
- unless ENV['http_proxy'].to_s.strip.empty?
56
- proxy_addr, proxy_port = ENV['http_proxy'].gsub('http://', '').gsub('/', '').split(':')
57
- end
58
- { addr: proxy_addr, port: proxy_port }
59
- end
60
- end
61
- end
62
- end
@@ -1,73 +0,0 @@
1
- require 'epo/ops/ipc_class_hierarchy'
2
-
3
- module Epo
4
- module Ops
5
- # Utility functions to work on Strings representing ipc classes.
6
- class IpcClassUtil
7
-
8
- # @return [Array] \['A', 'B', …, 'H'\]
9
- def self.main_classes
10
- %w( A B C D E F G H )
11
- end
12
-
13
- # check if the given ipc_class is valid as OPS search parameter
14
- # @param [String] ipc_class an ipc class
15
- # @return [Boolean]
16
- def self.valid_for_search?(ipc_class)
17
- ipc_class.match(/\A[A-H](\d{2}([A-Z](\d{1,2}\/\d{2,3})?)?)?\z/)
18
- end
19
-
20
- # There is a generic format for ipc classes that does not have
21
- # the / as delimiter and leaves space for additions. This parses
22
- # it into the format the register search understands
23
- # @param [String] generic ipc class in generic format
24
- # @return [String] reformatted ipc class
25
- # @example
26
- # parse_generic_format('A01B0003140000') #=> 'A01B3/14'
27
- def self.parse_generic_format(generic)
28
- ipc_class = generic
29
- if ipc_class.length > 4
30
- match = ipc_class.match(/([A-Z]\d{2}[A-Z])(\d{4})(\d{6})$/)
31
- ipc_class = match[1] + (match[2].to_i).to_s + '/' + process_number(match[3])
32
- end
33
- ipc_class
34
- end
35
-
36
- # @param [String] ipc_class an ipc_class
37
- # @return [Array] List of all ipc classes one level more specific.
38
- # @examples
39
- # children('A') #=> ['A01', 'A21', 'A22', 'A23', ...]
40
- # children('A62') #=> ['A62B', 'A62C', 'A62D'],
41
- # @raise [InvalidIpcClassError] if parameter is not a valid ipc class in
42
- # the format EPO understands
43
- # @raise [LevelNotSupportedError] for parameters with ipc class depth >= 3
44
- # e.g. 'A62B' cannot be split further. It is currently not necessary to
45
- # do so, it would only blow up the gem, and you do not want to query for
46
- # all classes at the lowest level, as it takes too many requests.
47
- def self.children(ipc_class)
48
- return main_classes if ipc_class.nil?
49
- valid = valid_for_search?(ipc_class)
50
- fail InvalidIpcClassError, ipc_class unless valid
51
- map = IpcClassHierarchy::Hierarchy
52
- fail LevelNotSupportedError, ipc_class unless map.key? ipc_class
53
- map[ipc_class]
54
- end
55
-
56
- # An ipc class in invalid format was given, or none at all.
57
- class InvalidIpcClassError < StandardError; end
58
- # It is currently not supported to split by the most specific class level.
59
- # This would result in a large amount of requests.
60
- class LevelNotSupportedError < StandardError; end
61
-
62
- private
63
-
64
- def self.process_number(number)
65
- result = number.gsub(/0+$/, '')
66
- result += '0' if result.length == 1
67
- result = '00' if result.length == 0
68
-
69
- result
70
- end
71
- end
72
- end
73
- end
@@ -1,22 +0,0 @@
1
- module Epo
2
- module Ops
3
- # The register search is limited by some parameters. With one
4
- # query one may only request as many as
5
- # {Epo::Ops::Limits::MAX_QUERY_INTERVAL} references at once.
6
- # Considering this, you have to split your requests by this
7
- # interval. Nevertheless, the maximum value you may use is
8
- # {Epo::Ops::Limits::MAX_QUERY_RANGE}. If you want to retrieve more
9
- # references you must split by other parameters.
10
- # @see Register
11
- class Limits
12
- # @return [Integer] The range in which you can search is limited, say you
13
- # cannot request all patents of a given class at once, you probably must
14
- # split your requests by additional conditions.
15
- MAX_QUERY_RANGE = 2000
16
-
17
- # @return [Integer] The maximum number of elements you may search with one
18
- # query. Ignoring this will result in errors.
19
- MAX_QUERY_INTERVAL = 100
20
- end
21
- end
22
- end
@@ -1,11 +0,0 @@
1
- module Epo
2
- module Ops
3
- # Simple logger writing some notifications to standard output.
4
- class Logger
5
- # Just hands the parameter to puts.
6
- def self.log(output)
7
- puts output
8
- end
9
- end
10
- end
11
- end
@@ -1,49 +0,0 @@
1
- module Epo
2
- module Ops
3
- class RateLimit
4
- WEEKLY_QUOTA_RESET_TIME = 604_800
5
- HOURLY_QUOTA_RESET_TIME = 600
6
- BASE_RESET_TIME = 60
7
-
8
- attr_reader :attr
9
-
10
- def initialize(http_header)
11
- fail "Rate Limit data should be a Hash but is #{http_header.inspect} (#{http_header.class.name})" unless http_header.is_a?(Hash)
12
- @attr = http_header
13
- end
14
-
15
- def limit_reached?
16
- @attr.key?('x-rejection-reason')
17
- end
18
-
19
- def rejection_reason
20
- return nil unless @attr['x-rejection-reason']
21
- case @attr['x-rejection-reason']
22
- when 'RegisteredQuotaPerWeek' then :weekly_quota
23
- when 'IndividualQuotaPerHour' then :hourly_quota
24
- else :unknown_reason
25
- end
26
- end
27
-
28
- def hourly_quota
29
- quota = @attr['x-individualquotaperhour-used']
30
- quota.to_i if quota
31
- end
32
-
33
- def weekly_quota
34
- quota = @attr['x-registeredquotaperweek-used']
35
- quota.to_i if quota
36
- end
37
-
38
- def reset_at
39
- return unless limit_reached?
40
-
41
- case rejection_reason
42
- when :weekly_quota then Time.now.to_i + WEEKLY_QUOTA_RESET_TIME
43
- when :hourly_quota then Time.now.to_i + HOURLY_QUOTA_RESET_TIME
44
- else Time.now.to_i + BASE_RESET_TIME
45
- end
46
- end
47
- end
48
- end
49
- end
@@ -1,152 +0,0 @@
1
- require 'epo/ops'
2
- require 'epo/ops/client'
3
- require 'epo/ops/util'
4
- require 'epo/ops/bibliographic_document'
5
- require 'epo/ops/logger'
6
- require 'epo/ops/ipc_class_util'
7
-
8
- module Epo
9
- module Ops
10
- # Access to the {http://ops.epo.org/3.2/rest-services/register register}
11
- # endpoint of the EPO OPS API.
12
- #
13
- # By now you can search and retrieve patents by using the type `application`
14
- # in the `epodoc` format.
15
- #
16
- # Search queries are limited by size, not following these limits
17
- # will result in errors. You should probably use {.search} which handles the
18
- # limits itself.
19
- #
20
- # For more fine grained control use {.raw_search} and {.raw_biblio}
21
- #
22
- # @see Limits
23
- # @see SearchQueryBuilder
24
- class Register
25
- # A helper method which creates queries that take API limits into account.
26
- # @param patent_count [Integer] number of overall results expected.
27
- # See {.published_patents_count}
28
- #
29
- # @return [Array] of Strings, each a query to put into {Register.raw_search}
30
- # @see Epo::Ops::Limits
31
- def self.split_by_size_limits(ipc_class, date, patent_count)
32
- max_interval = Limits::MAX_QUERY_INTERVAL
33
- (1..patent_count).step(max_interval).map do |start|
34
- range_end = [start + max_interval - 1, patent_count].min
35
- Epo::Ops::SearchQueryBuilder.build(ipc_class, date, start, range_end)
36
- end
37
- end
38
-
39
- # Makes the requests to find how many patents are in each top
40
- # level ipc class on a given date.
41
- #
42
- # @param date [Date] date on which patents should be counted
43
- # @return [Hash] Hash ipc_class => count (ipc_class A-H)
44
- def self.patent_counts_per_ipc_class(date)
45
- %w( A B C D E F G H ).inject({}) do |mem, icc|
46
- mem[icc] = published_patents_counts(icc, date)
47
- mem
48
- end
49
- end
50
-
51
- # @param date [Date]
52
- # @param ipc_class [String] up to now should only be between A-H
53
- # @return [Integer] number of patents with given parameters
54
- def self.published_patents_counts(ipc_class = nil, date = nil)
55
- query = SearchQueryBuilder.build(ipc_class, date, 1, 2)
56
- minimum_result_set = Register.raw_search(query, true)
57
- return 0 if minimum_result_set.empty?
58
- minimum_result_set['world_patent_data']['register_search']['total_result_count'].to_i
59
- end
60
-
61
- # Search method returning all unique register references on a given
62
- # date, with optional ipc_class.
63
- # @note This method does more than one query; it may happen that you
64
- # exceed your API limits
65
- # @return [Array] Array of {SearchEntry}
66
- def self.search(ipc_class = nil, date = nil)
67
- queries = all_queries(ipc_class, date)
68
- search_entries = queries.flat_map { |query| raw_search(query) }
69
- search_entries.uniq { |se| se.application_reference.epodoc_reference }
70
- end
71
-
72
- # @return [Array] Array of Strings containing queries applicable to
73
- # {Register.raw_search}.
74
- # builds all queries necessary to find all patent references on a given
75
- # date.
76
- def self.all_queries(ipc_class = nil, date = nil)
77
- count = published_patents_counts(ipc_class, date)
78
- if count > Limits::MAX_QUERY_RANGE
79
- IpcClassUtil.children(ipc_class).flat_map { |ic| all_queries(ic, date) }
80
- else
81
- split_by_size_limits(ipc_class, date, count)
82
- end
83
- end
84
-
85
- # @param query A query built with {Epo::Ops::SearchQueryBuilder}
86
- # @param raw if `true` the result will be the raw response as a nested
87
- # hash. if false(default) the result will be parsed further, returning a
88
- # list of [SearchEntry]
89
- # @return [Array] containing {SearchEntry}
90
- def self.raw_search(query, raw = false)
91
- hash = Client.request(:get, register_api_string + 'search?' + query).parsed
92
- return parse_search_results(hash) unless raw
93
- hash
94
- rescue Epo::Ops::Error::NotFound
95
- []
96
- end
97
-
98
- # @param search_entry [SearchEntry] a search entry which should be
99
- # retrieved.
100
- # @return [BibliographicDocument] a parsed document.
101
- def self.biblio(search_entry)
102
- raw_biblio(search_entry.application_reference.epodoc_reference)
103
- end
104
-
105
- # @param reference_id [String] identifier for document. Format similar to
106
- # EP1000000
107
- # @param format [String] epodoc is a format defined by the EPO for a
108
- # document id. see their documentation.
109
- # @param type [String] may be `application` or `publication` make sure
110
- # that the `reference_id` is matching
111
- # @param raw [Boolean] flag if the result should be returned as a raw Hash
112
- # or parsed as {BibliographicDocument}
113
- # @return [BibliographicDocument, Hash]
114
- def self.raw_biblio(reference_id, type = 'application', format = 'epodoc', raw = false)
115
- request = "#{register_api_string}#{type}/#{format}/#{reference_id}/biblio"
116
- result = Client.request(:get, request).parsed
117
- raw ? result : BibliographicDocument.new(result)
118
- end
119
-
120
- Reference = Struct.new(:country, :doc_number, :date) do
121
- def epodoc_reference
122
- country + doc_number
123
- end
124
- end
125
-
126
- SearchEntry = Struct.new(:publication_reference, :application_reference, :ipc_classes)
127
-
128
- private
129
-
130
- def self.parse_search_results(result)
131
- path = %w(world_patent_data register_search register_documents register_document bibliographic_data)
132
-
133
- list = Util.find_in_data(result, path)
134
- list.map do |entry|
135
- publication_reference = Reference.new(
136
- entry['publication_reference']['document_id']['country'],
137
- entry['publication_reference']['document_id']['doc_number'],
138
- entry['publication_reference']['document_id']['date'])
139
- application_reference = Reference.new(
140
- entry['application_reference']['document_id']['country'],
141
- entry['application_reference']['document_id']['doc_number'])
142
- ipc_classes = entry['classifications_ipcr']['classification_ipcr']['text'].split(';;').map(&:strip)
143
- SearchEntry.new(publication_reference, application_reference, ipc_classes)
144
- end
145
- end
146
-
147
- def self.register_api_string
148
- "/#{Epo::Ops::API_VERSION}/rest-services/register/"
149
- end
150
- end
151
- end
152
- end
@@ -1,65 +0,0 @@
1
- require 'epo/ops/limits'
2
- require 'epo/ops/logger'
3
-
4
- module Epo
5
- module Ops
6
- # This Builder helps creating a search query using
7
- # {https://www.loc.gov/standards/sru/cql/ CQL} (Common Query Language or
8
- # Contextual Query Language) with the identifiers specified by the EPO in
9
- # the OPS Documentation chapter 4.2 ({https://www.epo.org/searching-for-patents/technical/espacenet/ops.html Link})
10
- # - use tab Downloads and see file 'OPS version 3.1 documentation').
11
- class SearchQueryBuilder
12
- # Build the query with the given parameters. Invalid ranges are fixed
13
- # automatically and you will be notified about the changes
14
- # @return [String]
15
- def self.build(ipc_class, date, range_start = 1, range_end = nil)
16
- validated_range = validate_range range_start, range_end
17
- "q=#{build_params(ipc_class, date)}&Range=#{validated_range[0]}-#{validated_range[1]}"
18
- end
19
-
20
- private
21
-
22
- def self.build_params(ipc_class, date)
23
- [build_date(date), build_class(ipc_class)].compact.join(' and ')
24
- end
25
-
26
- def self.build_date(date)
27
- if date
28
- "pd=#{('%04d' % date.year)}"\
29
- "#{('%02d' % date.month)}"\
30
- "#{('%02d' % date.day)}"
31
- end
32
- end
33
-
34
- def self.build_class(ipc_class)
35
- "ic=#{ipc_class}" if ipc_class
36
- end
37
-
38
- # Fixes the range given so that they meed the EPO APIs rules. The range
39
- # may only be 100 elements long, the maximum allowed value is 2000.
40
- # If the given window is out of range, it will be moved preserving the
41
- # distance covered.
42
- # @see Epo::Ops::Limits
43
- # @return array with two elements: [range_start, range_end]
44
- def self.validate_range(range_start, range_end)
45
- if range_start > range_end
46
- range_start, range_end = range_end, range_start
47
- Logger.log('range_start was bigger than range_end, swapped values')
48
- elsif range_end - range_start > Limits::MAX_QUERY_INTERVAL - 1
49
- range_end = range_start + Limits::MAX_QUERY_INTERVAL - 1
50
- Logger.log("range invalid, set to: #{[range_start, range_end]}")
51
- end
52
- if range_start < 1
53
- range_end = range_end - range_start + 1
54
- range_start = 1
55
- Logger.log("range_start must be > 0, set to: #{[range_start, range_end]}")
56
- elsif range_end > Limits::MAX_QUERY_RANGE
57
- range_start = Limits::MAX_QUERY_RANGE - (range_end - range_start)
58
- range_end = Limits::MAX_QUERY_RANGE
59
- Logger.log("range_end was too big, set to: #{[range_start, range_end]}")
60
- end
61
- [range_start, range_end]
62
- end
63
- end
64
- end
65
- end
@@ -1,35 +0,0 @@
1
- require 'oauth2'
2
- require 'epo/ops'
3
-
4
- module Epo
5
- module Ops
6
- # This class saves the token in memory, you may want to subclass this and
7
- # overwrite #token if you want to store it somewhere else.
8
- #
9
- class TokenStore
10
- def token
11
- @token = generate_token if !@token || @token.expired?
12
-
13
- @token
14
- end
15
-
16
- def reset
17
- @token = nil
18
- end
19
-
20
- protected
21
-
22
- def generate_token
23
- client = OAuth2::Client.new(
24
- Epo::Ops.config.consumer_key,
25
- Epo::Ops.config.consumer_secret,
26
- site: 'https://ops.epo.org/',
27
- token_url: "/#{Epo::Ops::API_VERSION}/auth/accesstoken",
28
- raise_errors: false
29
- )
30
-
31
- client.client_credentials.get_token
32
- end
33
- end
34
- end
35
- end