epo-ops 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7fdce426fc2fbb64f5f6c91f76f9892a0d9bcd59
4
+ data.tar.gz: ccc3a3ddd81354d151da39a4b37d01916fede709
5
+ SHA512:
6
+ metadata.gz: ef5f5150ab3eea7bb22675946a1f7c21c4fed46dcba1be39b187f633fe77a756aa261d4bc37a03e59b627005106a00450471da97b7eec5769c0435ed1ca13251
7
+ data.tar.gz: fb9e7f249f6330083d95ab565b2bb2824ea45f220b257fb52741ee76a0dc0165d493388ef2bfb29142e0c1aa4b0e45b76a59d726d96b1120cef49e8b29b6aceb
data/.gitignore ADDED
@@ -0,0 +1,37 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /test/epo_credentials.yml
12
+ /tmp/
13
+
14
+ ## Specific to RubyMotion:
15
+ .dat*
16
+ .repl_history
17
+ build/
18
+
19
+ ## Documentation cache and generated files:
20
+ /.yardoc/
21
+ /_yardoc/
22
+ /doc/
23
+ /rdoc/
24
+
25
+ ## Environment normalization:
26
+ /.bundle/
27
+ /vendor/bundle
28
+ /lib/bundler/man/
29
+
30
+ # for a library or gem, you might want to ignore these files since the code is
31
+ # intended to run in multiple environments; otherwise, check them in:
32
+ Gemfile.lock
33
+ # .ruby-version
34
+ # .ruby-gemset
35
+
36
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
37
+ .rvmrc
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.0.0"
4
+ - "2.1.0"
5
+ - "2.2.0"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in epo-ops.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 FHG-IMW
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ [![Build Status](https://travis-ci.org/FHG-IMW/epo-ops.svg?branch=master)](https://travis-ci.org/FHG-IMW/epo-ops)
2
+ [![Code Climate](https://codeclimate.com/github/FHG-IMW/epo-ops/badges/gpa.svg)](https://codeclimate.com/github/FHG-IMW/epo-ops)
3
+
4
+ # epo-ops
5
+ Ruby interface to the EPO Open Patent Services (OPS).
6
+
7
+ You can play around with the API [here](https://developers.epo.org/).
8
+ Documentation of it can be found [here](https://www.epo.org/searching-for-patents/technical/espacenet/ops.html) under `Downloads`.
9
+
10
+ # Usage
11
+
12
+ ## Authentification
13
+ In order to use this gem you need to register at the EPO for OAuth
14
+ [here](https://developers.epo.org/user/register).
15
+ Use your credentials by configuring
16
+ ```ruby
17
+ Epo::Ops.configure do |conf|
18
+ conf.consumer_key = "YOUR_KEY"
19
+ conf.consumer_secret = "YOUR_SECRET"
20
+ end
21
+ ```
22
+
23
+ ## What works up to now
24
+ * Search the EPO OPS register with `Epo::Ops::Register.search(query)`; use `Epo::Ops::SearchQueryBuilder` to build an appropriate request.
25
+ * Get bibliographic info from the register, both for application and publication references (which you may retrieve with the search).
26
+ * Bulk searching for all patents on a given date wih `Epo::Ops::Register::Bulk.all_queries(date)`. Note that patents are usually published on Wednesdays, if you find some on another weekday, please let us know.
27
+ This method currently returns all queries necessary to find all patents with `Epo::Ops::Register.search`
28
+
29
+ ### #search
30
+ Use the `SearchQueryBuilder` to set up the queries. By default structs are returned that should make it easier to work with the results, but with the `raw`-flag set to true you may also retrieve the resulting hash and parse it yourself.
31
+ The results have the method `#epodoc_reference` which perfectly fits into `#biblio`
32
+
33
+ ### #biblio
34
+ With `Epo::Ops::Register.biblio(reference_id)` you can retrieve the bibliographic entry for the given patent (see OPS documentation). By default it searches the `/application/` endpoint, but you may set `publication` as the second parameter. Make sure the `reference_id` matches the given type. The last optional parameter allows you to set another format the id, but the default `epodoc` is strongly advised. This format is also provided from search results with `#epodoc_reference`.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ t.libs << 'lib'
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task default: :test
data/epo-ops.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'epo/ops/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'epo-ops'
8
+ spec.version = Epo::Ops::VERSION
9
+ spec.authors = ['Max Kießling', 'Robert Terbach', 'Michael Prilop']
10
+
11
+ spec.summary = 'Ruby interface to the European Patent Office API (OPS)'
12
+ spec.description = 'This gem allows simple access to the European Patent'\
13
+ ' Offices (EPO) Open Patent Services (OPS) using their XML-API'
14
+ spec.homepage = 'https://github.com/FHG-IMW/epo-ops'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = 'exe'
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.3'
22
+ spec.add_development_dependency 'rake', '~> 10.5'
23
+ spec.add_development_dependency 'minitest', '~> 5.8'
24
+ spec.add_development_dependency 'vcr', '~> 2.9'
25
+ spec.add_development_dependency 'webmock', '~> 1.22'
26
+ spec.add_development_dependency 'simplecov'
27
+
28
+ spec.add_dependency 'oauth2', '~> 1.1'
29
+ end
data/lib/epo/ops.rb ADDED
@@ -0,0 +1,41 @@
1
+ require 'epo/ops/version'
2
+ require 'epo/ops/token_store'
3
+ require 'epo/ops/register'
4
+ require 'epo/ops/search_query_builder'
5
+
6
+ module Epo
7
+ module Ops
8
+ # Configure your OAuth credentials to use with this gem.
9
+ # @example
10
+ # Epo:Ops.configure do |conf|
11
+ # conf.consumer_key = "foo"
12
+ # conf.consumer_secret = "bar"
13
+ # end
14
+ # Optional parameter:
15
+ # conf.token_store (defaults to {Epo::Ops::TokenStore})
16
+ # @yieldparam [Configuration] configuration that is yielded.
17
+ def self.configure
18
+ yield(config)
19
+ end
20
+
21
+ # The {Configuration} used. You may want to call {Epo::Ops#configure} first.
22
+ # @return [Configuration] the configuration used.
23
+ def self.config
24
+ @configuration ||= Configuration.new
25
+ end
26
+
27
+ class Configuration
28
+ attr_accessor :consumer_key, :consumer_secret, :token_store
29
+
30
+ def initialize
31
+ @consumer_key = ''
32
+ @consumer_secret = ''
33
+ @token_store = Epo::Ops::TokenStore.new
34
+
35
+ OAuth2::Response.register_parser(:xml, ['application/xml']) do |body|
36
+ MultiXml.parse(body)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,60 @@
1
+ module Epo
2
+ module Ops
3
+ # Used to represent persons or companies (or both) in patents. Used for
4
+ # both, agents and applicants. Most of the time, when `name` is a person
5
+ # name, `address1` is a company name. Be aware that the addresses are in
6
+ # their respective local format.
7
+ #
8
+ # Current patents usually at least use the fields address1-3, so they should
9
+ # nearly always have values. Nevertheless, older ones often only use 1-2.
10
+ # Note also that EPOs schema documents fields like `street` or `city`, but
11
+ # by now they have not been used yet.
12
+ #
13
+ # @attr [String] name the name of an entity (one or more persons or
14
+ # companies)
15
+ # @attr [String] address1 first address line. May also be a company name
16
+ # @attr [String] address2 second address line
17
+ # @attr [String] address3 third address line, may be empty
18
+ # @attr [String] address4 fourth address line, may be empty
19
+ # @attr [String] address5 fifth address line, may be empty
20
+ # @attr [String] country_code two letter country code of the address
21
+ # @attr [Date] occurred_on the date an address occurred on, usually matching
22
+ # the entries change_gazette_num
23
+ # @attr [String] cdsid some kind of id the EPO provides, not sure yet if
24
+ # usable as reference.
25
+ class Address
26
+ attr_reader :name, :address1,
27
+ :address2,
28
+ :address3,
29
+ :address4,
30
+ :address5,
31
+ :country_code,
32
+ :occurred_on,
33
+ :cdsid
34
+ def initialize(name, address1, address2, address3, address4,
35
+ address5, country_code, occurred_on,
36
+ cdsid)
37
+ @address1 = address1
38
+ @address2 = address2
39
+ @address3 = address3 || ''
40
+ @address4 = address4 || ''
41
+ @address5 = address5 || ''
42
+ @name = name
43
+ @country_code = country_code || ''
44
+ @occurred_on = occurred_on || ''
45
+ @cdsid = cdsid || ''
46
+ end
47
+
48
+ # Compare addresses by the name and address fields.
49
+ # @return [Boolean]
50
+ def equal_name_and_address?(other)
51
+ name == other.name &&
52
+ address1 == other.address1 &&
53
+ address2 == other.address2 &&
54
+ address3 == other.address3 &&
55
+ address4 == other.address4 &&
56
+ address5 == other.address5
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,196 @@
1
+ require 'epo/ops/address'
2
+
3
+ module Epo
4
+ module Ops
5
+ # Parses and simplifies the elements the EPO OPS returns for bibliographic
6
+ # documents. Parsing is done lazily.
7
+ # Some elements are not yet fully parsed but hashes returned instead.
8
+ # Not all information available is parsed (e.g. inventors), if you need
9
+ # more fields, add them here.
10
+ class BibliographicDocument
11
+ # @return [Hash] a nested Hash, which is a parsed XML response of the
12
+ # `/biblio` endpoint of the EPO APIs.
13
+ # @see Client
14
+ attr_reader :raw
15
+
16
+ def initialize(raw)
17
+ @raw = raw
18
+ end
19
+
20
+ # A number by which a patent is uniquely identifiable and querieable.
21
+ # The first two letters are the country code of the processing patent
22
+ # office, for european patents this is EP.
23
+ # @return [String] application number.
24
+ def application_nr
25
+ @application_nr ||= parse_application_nr
26
+ end
27
+
28
+ # @return [String] The URL at which you can query the original document.
29
+ def url
30
+ @url ||= "https://ops.epo.org/3.1/rest-services/register/application/epodoc/#{application_nr}"
31
+ end
32
+
33
+ # @return [String] the english title of the patent @note Titles are
34
+ # usually available at least in english, french and german.
35
+ # Other languages are also possible.
36
+ def title
37
+ @title ||= parse_title
38
+ end
39
+
40
+ # @return [Array] a list of the IPC-Classifications, as strings.
41
+ # Format is set by EPO, should be similar to: E06B7/23
42
+ def classifications
43
+ @classifications ||= parse_classification raw
44
+ end
45
+
46
+ # Agents and applicants are subject to change at EPO, often
47
+ # their names or addresses are updated, sometimes other
48
+ # people/companies appear or disappear.
49
+ #
50
+ # @return [Array] Array of {Address}
51
+ def agents
52
+ @agents ||= parse_agents raw
53
+ end
54
+
55
+ # (see #agents)
56
+ def applicants
57
+ @applicants ||= parse_applicants raw
58
+ end
59
+
60
+ # @return [String] the string representation of the current patent status as
61
+ # described by the EPO
62
+ def status
63
+ @status ||= parse_status raw
64
+ end
65
+
66
+ # Many fields of the XML the EPO provides have a field
67
+ # `change_gazette_num`. It is a commercial date (year + week)
68
+ # that describes in which week the element has been
69
+ # changed. This method parses them and returns the most recent
70
+ # date found.
71
+ # @return [Date] the latest date found in the document.
72
+ def latest_update
73
+ @latest ||= parse_latest_update raw
74
+ end
75
+
76
+ # The priority date describes the first document that was filed at any
77
+ # patent office in the world regarding this patent.
78
+ # @return [Hash] a hash which descibes the filed priority with the fields:
79
+ # `country` `doc_number`, `date`, `kind`, and `sequence`
80
+ def priority_date
81
+ @priority_date ||= parse_priority_date raw
82
+ end
83
+
84
+ # @return [Array] List of hashes containing information about publications
85
+ # made, entries exist for multiple types of publications, e.g. A1, B1.
86
+ def publication_references
87
+ @publication_dates ||= parse_publication_references raw
88
+ end
89
+
90
+ def effective_date
91
+ @effective_date ||= parse_effective_date raw
92
+ end
93
+
94
+ private
95
+
96
+ def parse_title
97
+ titles = Util.find_in_data(raw,
98
+ path_to_bibliographic_data +
99
+ ['invention_title'])
100
+ titles.each do |the_title|
101
+ return the_title['__content__'] if the_title['lang'] == 'en'
102
+ end
103
+ # no english title found
104
+ titles.first['__content__']
105
+ end
106
+
107
+ def parse_application_nr
108
+ path = %w(world_patent_data register_search query __content__)
109
+ Util.find_in_data(raw, path).first.partition('=').last
110
+ end
111
+
112
+ def parse_priority_date(raw)
113
+ priority_claims = Util.find_in_data(raw,
114
+ path_to_bibliographic_data +
115
+ %w(priority_claims))
116
+ .first
117
+ if priority_claims.nil?
118
+ priority_date = nil
119
+ else
120
+ priority_date = priority_claims['priority_claim'].is_a?(Hash) ? priority_claims['priority_claim'] : priority_claims['priority_claim'].first
121
+ end
122
+ priority_date
123
+ end
124
+
125
+ def parse_publication_references(raw)
126
+ Util.parse_hash_flat(
127
+ Util.find_in_data(raw,
128
+ path_to_bibliographic_data +
129
+ %w(publication_reference)), 'document_id')
130
+ end
131
+
132
+ def parse_effective_date(raw)
133
+ effective_date =
134
+ Util.find_in_data(raw,
135
+ path_to_bibliographic_data +
136
+ %w(dates_rights_effective request_for_examination))
137
+ effective_date.first.nil? ? nil : effective_date.first['date']
138
+ end
139
+
140
+ def parse_latest_update(raw)
141
+ gazette_nums = Util.parse_hash_flat(raw, 'change_gazette_num')
142
+ nums = gazette_nums.map { |num| Util.parse_change_gazette_num(num) }.keep_if { |match| !match.nil? }
143
+ nums.max
144
+ end
145
+
146
+ def parse_status(raw)
147
+ Util.find_in_data(raw,
148
+ path_to_bibliographic_data + ['status'])
149
+ .first
150
+ end
151
+
152
+ def parse_classification(raw)
153
+ Util.find_in_data(raw,
154
+ path_to_bibliographic_data +
155
+ %w(classifications_ipcr classification_ipcr text))
156
+ .first.split(',').map(&:strip)
157
+ end
158
+
159
+ def parse_agents(raw)
160
+ entries = Util.find_in_data(raw,
161
+ path_to_bibliographic_data +
162
+ %w(parties agents))
163
+ parse_address(entries, 'agent')
164
+ end
165
+
166
+ def parse_applicants(raw)
167
+ entries = Util.find_in_data(raw,
168
+ path_to_bibliographic_data +
169
+ %w(parties applicants))
170
+ parse_address(entries, 'applicant')
171
+ end
172
+
173
+ def parse_address(party_group_entries, group)
174
+ party_group_entries.flat_map do |entry|
175
+ change_date = Util.parse_change_gazette_num(
176
+ entry.fetch('change_gazette_num', '')) || latest_update
177
+ Util.find_in_data(entry, [group, 'addressbook']).map do |address|
178
+ Address.new(address['name'],
179
+ address['address']['address_1'],
180
+ address['address']['address_2'],
181
+ address['address']['address_3'],
182
+ address['address']['address_4'],
183
+ address['address']['address_5'],
184
+ address['address']['country'],
185
+ change_date,
186
+ address['cdsid'])
187
+ end
188
+ end
189
+ end
190
+
191
+ def path_to_bibliographic_data
192
+ %w(world_patent_data register_search register_documents register_document bibliographic_data)
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,16 @@
1
+ require 'epo/ops/token_store'
2
+ require 'epo/ops/error'
3
+
4
+ module Epo
5
+ module Ops
6
+ class Client
7
+ # @return [OAuth2::Response]
8
+ def self.request(verb, url, options = {})
9
+ token = Epo::Ops.config.token_store.token
10
+ response = token.request(verb, URI.encode(url), options)
11
+ fail Error.from_response(response) unless response.status == 200
12
+ response
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,85 @@
1
+ require 'epo/ops/rate_limit'
2
+
3
+ module Epo
4
+ module Ops
5
+ class Error < StandardError
6
+ # @return [Integer]
7
+ attr_reader :code, :rate_limit
8
+
9
+ # Raised when EPO returns a 4xx HTTP status code
10
+ ClientError = Class.new(self)
11
+ # Raised when EPO returns the HTTP status code 400
12
+ BadRequest = Class.new(ClientError)
13
+ # Raised when EPO returns the HTTP status code 401
14
+ Unauthorized = Class.new(ClientError)
15
+ # Raised when EPO returns the HTTP status code 403
16
+ Forbidden = Class.new(ClientError)
17
+ # Raised when EPO returns the HTTP status code 404
18
+ NotFound = Class.new(ClientError)
19
+ # Raised when EPO returns the HTTP status code 406
20
+ NotAcceptable = Class.new(ClientError)
21
+ # Raised when EPO returns the HTTP status code 422
22
+ UnprocessableEntity = Class.new(ClientError)
23
+ # Raised when EPO returns the HTTP status code 429
24
+ TooManyRequests = Class.new(ClientError)
25
+ # Raised when EPO returns a 5xx HTTP status code
26
+ ServerError = Class.new(self)
27
+ # Raised when EPO returns the HTTP status code 500
28
+ InternalServerError = Class.new(ServerError)
29
+ # Raised when EPO returns the HTTP status code 502
30
+ BadGateway = Class.new(ServerError)
31
+ # Raised when EPO returns the HTTP status code 503
32
+ ServiceUnavailable = Class.new(ServerError)
33
+ # Raised when EPO returns the HTTP status code 504
34
+ GatewayTimeout = Class.new(ServerError)
35
+
36
+ ERRORS = {
37
+ 400 => Epo::Ops::Error::BadRequest,
38
+ 401 => Epo::Ops::Error::Unauthorized,
39
+ 403 => Epo::Ops::Error::Forbidden,
40
+ 404 => Epo::Ops::Error::NotFound,
41
+ 406 => Epo::Ops::Error::NotAcceptable,
42
+ 422 => Epo::Ops::Error::UnprocessableEntity,
43
+ 429 => Epo::Ops::Error::TooManyRequests,
44
+ 500 => Epo::Ops::Error::InternalServerError,
45
+ 502 => Epo::Ops::Error::BadGateway,
46
+ 503 => Epo::Ops::Error::ServiceUnavailable,
47
+ 504 => Epo::Ops::Error::GatewayTimeout
48
+ }.freeze
49
+ FORBIDDEN_MESSAGES = {
50
+ 'This request has been rejected due to the violation of Fair Use policy' => Epo::Ops::Error::TooManyRequests
51
+ }.freeze
52
+
53
+ class << self
54
+ # Parses an error from the given response
55
+ # @return [Error]
56
+ def from_response(response)
57
+ code = response.status
58
+ message = parse_error(response.parsed)
59
+
60
+ if code == 403 && FORBIDDEN_MESSAGES[message]
61
+ FORBIDDEN_MESSAGES[message].new(message, response.headers, code)
62
+ else
63
+ ERRORS[code].new(message, response.headers, code)
64
+ end
65
+ end
66
+
67
+ private
68
+
69
+ def parse_error(body)
70
+ if body.nil? || body.empty?
71
+ nil
72
+ elsif body['error'] && body['error']['message']
73
+ body['error']['message']
74
+ end
75
+ end
76
+ end
77
+
78
+ def initialize(message = '', rate_limit = {}, code = nil)
79
+ super(message)
80
+ @code = code
81
+ @rate_limit = RateLimit.new(rate_limit)
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,22 @@
1
+ module Epo
2
+ module Ops
3
+ # The register search is limited by some parameters. With one
4
+ # query one may only request as many as
5
+ # {Epo::Ops::Limits::MAX_QUERY_INTERVAL} references at once.
6
+ # Considering this, you have to split your requests by this
7
+ # interval. Nevertheless, the maximum value you may use is
8
+ # {Epo::Ops::Limits::MAX_QUERY_RANGE}. If you want to retrieve more
9
+ # references you must split by other parameters.
10
+ # @see Register
11
+ class Limits
12
+ # @return [Integer] The range in which you can search is limited, say you
13
+ # cannot request all patents of a given class at once, you probably must
14
+ # split your requests by additional conditions.
15
+ MAX_QUERY_RANGE = 2000
16
+
17
+ # @return [Integer] The maximum number of elements you may search with one
18
+ # query. Ignoring this will result in errors.
19
+ MAX_QUERY_INTERVAL = 100
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,11 @@
1
+ module Epo
2
+ module Ops
3
+ # Simple logger writing some notifications to standard output.
4
+ class Logger
5
+ # Just hands the parameter to puts.
6
+ def self.log(output)
7
+ puts output
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,49 @@
1
+ module Epo
2
+ module Ops
3
+ class RateLimit
4
+ WEEKLY_QUOTA_RESET_TIME = 604_800
5
+ HOURLY_QUOTA_RESET_TIME = 600
6
+ BASE_RESET_TIME = 60
7
+
8
+ attr_reader :attr
9
+
10
+ def initialize(http_header)
11
+ fail "Rate Limit data should be a Hash but is #{http_header.inspect} (#{http_header.class.name})" unless http_header.is_a?(Hash)
12
+ @attr = http_header
13
+ end
14
+
15
+ def limit_reached?
16
+ @attr.key?('x-rejection-reason')
17
+ end
18
+
19
+ def rejection_reason
20
+ return nil unless @attr['x-rejection-reason']
21
+ case @attr['x-rejection-reason']
22
+ when 'RegisteredQuotaPerWeek' then :weekly_quota
23
+ when 'IndividualQuotaPerHour' then :hourly_quota
24
+ else :unknown_reason
25
+ end
26
+ end
27
+
28
+ def hourly_quota
29
+ quota = @attr['x-individualquotaperhour-used']
30
+ quota.to_i if quota
31
+ end
32
+
33
+ def weekly_quota
34
+ quota = @attr['x-registeredquotaperweek-used']
35
+ quota.to_i if quota
36
+ end
37
+
38
+ def reset_at
39
+ return unless limit_reached?
40
+
41
+ case rejection_reason
42
+ when :weekly_quota then Time.now.to_i + WEEKLY_QUOTA_RESET_TIME
43
+ when :hourly_quota then Time.now.to_i + HOURLY_QUOTA_RESET_TIME
44
+ else Time.now.to_i + BASE_RESET_TIME
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,177 @@
1
+ require 'epo/ops'
2
+ require 'epo/ops/client'
3
+ require 'epo/ops/util'
4
+ require 'epo/ops/bibliographic_document'
5
+ require 'epo/ops/logger'
6
+
7
+ module Epo
8
+ module Ops
9
+ # Access to the {http://ops.epo.org/3.1/rest-services/register register}
10
+ # endpoint of the EPO OPS API.
11
+ #
12
+ # By now you can search and retrieve patents by using the type `application`
13
+ # in the `epodoc` format.
14
+ #
15
+ # Search queries are limited by size, not following these limits
16
+ # will result in errors.
17
+ #
18
+ # @see Limits
19
+ # @see SearchQueryBuilder
20
+ class Register
21
+ # Helper class that assists in building the queries necessary to search
22
+ # for more patents than possible with one query respecting the given
23
+ # limits.
24
+ #
25
+ # @see Limits
26
+ class Bulk
27
+ # Helper method returning all unique register references on a given
28
+ # date. This is the same as executing all queries from {.all_queries}
29
+ # and making the results unique.
30
+ #
31
+ # @note Patents may have more than one IPC class, they would appear
32
+ # more than once, this method filters these by `doc_number`
33
+ def self.all_register_references(date)
34
+ begin
35
+ queries = Bulk.all_queries(date)
36
+ search_entries = queries.flat_map do |query|
37
+ Register.search(query)
38
+ end
39
+ rescue ::Epo::Ops::Error::NotFound
40
+ return []
41
+ end
42
+ search_entries.map(&:application_reference)
43
+ .uniq(&:doc_number)
44
+ end
45
+
46
+ # Build the queries to search for all patents on a given date.
47
+ #
48
+ # The offset of EPOs register search may at max be 2000, if more patents
49
+ # are published on one day the queries must be split; here across the
50
+ # first level of ipc classification.
51
+ # At time of this writing they are mostly below 1000, there should be
52
+ # plenty of space for now.
53
+ #
54
+ # In case the limits change, they can be found in {Epo::Ops::Limits}
55
+ # Should there be more than 2000 patents in one class, a message will
56
+ # be logged, please file an Issue if that happens.
57
+ #
58
+ # @return [Array] containing all queries to put into {Register.search}.
59
+ # @note The queries are split by IPC-classes if necessary; Patents may
60
+ # have more than one, you might get multiple references to the same
61
+ # patent.
62
+ # @see .all_register_references
63
+ def self.all_queries(date)
64
+ overall_count = published_patents_count(date)
65
+ if overall_count > Limits::MAX_QUERY_RANGE
66
+ patent_count_by_ipc_classes(date).flat_map do |ipc_class, count|
67
+ builder = SearchQueryBuilder.new
68
+ .publication_date(date.year, date.month, date.day)
69
+ .and
70
+ .ipc_class(ipc_class)
71
+ split_by_size_limits(builder, count)
72
+ end
73
+ else
74
+ builder = SearchQueryBuilder.new
75
+ .publication_date(date.year, date.month, date.day)
76
+ split_by_size_limits(builder, overall_count)
77
+ end
78
+ end
79
+
80
+ # @return [Hash] For all top level IPC classes (A-H) => count
81
+ def self.patent_count_by_ipc_classes(date)
82
+ ipc_classes = %w(A B C D E F G H)
83
+ ipc_classes.inject({}) do |mem, ipcc|
84
+ mem[ipcc] = published_patents_count(date, ipcc)
85
+ if mem[ipcc] > Limits::MAX_QUERY_RANGE
86
+ Logger.log("IPC class #{ipcc} has more than #{Epo::Ops::Limits::MAX_QUERY_RANGE} on #{date}. They can not all be retrieved. Please file this as an issue!")
87
+ end
88
+ mem
89
+ end
90
+ end
91
+
92
+ # Splits the queries build by `query_builder` by the allowed intervals.
93
+ #
94
+ # @param query_builder [SearchQueryBuilder] with all settings made, but
95
+ # not built yet.
96
+ # @param patent_count [Integer] number of overall results expected.
97
+ # See {.published_patents_count}
98
+ #
99
+ # @return [Array] of Strings, each a query to put into {Register.search}
100
+ def self.split_by_size_limits(query_builder, patent_count)
101
+ max_interval = Limits::MAX_QUERY_INTERVAL
102
+ (1..patent_count).step(max_interval).map do |start|
103
+ query_builder.build(start, [start + max_interval - 1, patent_count].min)
104
+ end
105
+ end
106
+
107
+ # makes a minimum request to find out how many patents are published on
108
+ # that date
109
+ #
110
+ # @return [Integer] number of patents on that date.
111
+ def self.published_patents_count(date, ipc_class = nil)
112
+ query = SearchQueryBuilder.new
113
+ query.publication_date(date.year, date.month, date.day)
114
+ query.and.ipc_class(ipc_class) if ipc_class
115
+ query = query.build(1, 2)
116
+ minimum_result_set = Register.search(query, true)
117
+ return 0 if minimum_result_set.empty?
118
+ minimum_result_set['world_patent_data']['register_search']['total_result_count'].to_i
119
+ end
120
+ end
121
+
122
+ # @param query A query built with {Epo::Ops::SearchQueryBuilder}
123
+ # @param raw if `true` the result will be the raw response as a nested hash.
124
+ # if false(default) the result will be parsed further, returning a list of [SearchEntry]
125
+ # @return [Array] containing {SearchEntry}
126
+ def self.search(query, raw = false)
127
+ hash = Client.request(:get, register_api_string + query).parsed
128
+ return parse_search_results(hash) unless raw
129
+ hash
130
+ end
131
+
132
+ # @param format epodoc is a format defined by the EPO for a
133
+ # document id. see their documentation.
134
+ # @param type may be `application` or `publication` make sure that the
135
+ # `reference_id` is matching
136
+ # @param raw flag if the result should be returned as a raw Hash or
137
+ # parsed as {BibliographicDocument}
138
+ # @return [BibliographicDocument, Hash]
139
+ def self.biblio(reference_id, type = 'application', format = 'epodoc', raw = false)
140
+ request = "#{register_api_string}#{type}/#{format}/#{reference_id}/biblio"
141
+ result = Client.request(:get, request).parsed
142
+ raw ? result : BibliographicDocument.new(result)
143
+ end
144
+
145
+ Reference = Struct.new(:country, :doc_number, :date) do
146
+ def epodoc_reference
147
+ country + doc_number
148
+ end
149
+ end
150
+
151
+ SearchEntry = Struct.new(:publication_reference, :application_reference, :ipc_classes)
152
+
153
+ private
154
+
155
+ def self.parse_search_results(result)
156
+ path = %w(world_patent_data register_search register_documents register_document bibliographic_data)
157
+
158
+ list = Util.find_in_data(result, path)
159
+ list.map do |entry|
160
+ publication_reference = Reference.new(
161
+ entry['publication_reference']['document_id']['country'],
162
+ entry['publication_reference']['document_id']['doc_number'],
163
+ entry['publication_reference']['document_id']['date'])
164
+ application_reference = Reference.new(
165
+ entry['application_reference']['document_id']['country'],
166
+ entry['application_reference']['document_id']['doc_number'])
167
+ ipc_classes = entry['classifications_ipcr']['classification_ipcr']['text'].split(';;').map(&:strip)
168
+ SearchEntry.new(publication_reference, application_reference, ipc_classes)
169
+ end
170
+ end
171
+
172
+ def self.register_api_string
173
+ '/3.1/rest-services/register/'
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,70 @@
1
+ require 'epo/ops/limits'
2
+ require 'epo/ops/logger'
3
+
4
+ module Epo
5
+ module Ops
6
+ # This Builder helps creating a search query using
7
+ # {https://www.loc.gov/standards/sru/cql/ CQL} (Common Query Language or
8
+ # Contextual Query Language) with the identifies specified by the EPO in
9
+ # the OPS Documentation chapter 4.2 ( {https://www.epo.org/searching-for-patents/technical/espacenet/ops.html Link}
10
+ # - use tab Downloads and see file 'OPS version 3.1 documentation').
11
+ # Dont use a builder twice ;)
12
+ class SearchQueryBuilder
13
+ def initialize
14
+ @query = 'search?q='
15
+ end
16
+
17
+ def publication_date(year, month, day)
18
+ @query << "pd=#{('%04d' % year) << ('%02d' % month) << ('%02d' % day)}"
19
+ self
20
+ end
21
+
22
+ def and
23
+ @query << ' and '
24
+ self
25
+ end
26
+
27
+ def ipc_class(ipc_class)
28
+ @query << "ic=#{ipc_class}"
29
+ # TODO: ipc_class richtig formatieren
30
+ self
31
+ end
32
+
33
+ # builds the search query ready to put into the register API. The
34
+ # parameters are validated with {#validate_range}.
35
+ # This does not change the query, several calls will allow you to
36
+ # create the same queries for different ranges.
37
+ def build(range_start = 1, range_end = nil)
38
+ range_end ||= range_start + Limits::MAX_QUERY_INTERVAL - 1
39
+ validated_range = validate_range range_start, range_end
40
+ @query + "&Range=#{validated_range[0]}-#{validated_range[1]}"
41
+ end
42
+
43
+ # Fixes the range given so that they meed the EPO APIs rules. The range
44
+ # may only be 100 elements long, the maximum allowed value is 2000.
45
+ # If the given window is out of range, it will be moved preserving the
46
+ # distance covered.
47
+ # @see Epo::Ops::Limits
48
+ # @return array with two elements: [range_start, range_end]
49
+ def validate_range(range_start, range_end)
50
+ if range_start > range_end
51
+ range_start, range_end = range_end, range_start
52
+ Logger.log('range_start was bigger than range_end, swapped values')
53
+ elsif range_start == range_end || range_end - range_start > Limits::MAX_QUERY_INTERVAL - 1
54
+ range_end = range_start + Limits::MAX_QUERY_INTERVAL - 1
55
+ Logger.log("range invalid, set to: #{[range_start, range_end]}")
56
+ end
57
+ if range_start < 1
58
+ range_end = range_end - range_start + 1
59
+ range_start = 1
60
+ Logger.log("range_start must be > 0, set to: #{[range_start, range_end]}")
61
+ elsif range_end > Limits::MAX_QUERY_RANGE
62
+ range_start = Limits::MAX_QUERY_RANGE - (range_end - range_start)
63
+ range_end = Limits::MAX_QUERY_RANGE
64
+ Logger.log("range_end was too big, set to: #{[range_start, range_end]}")
65
+ end
66
+ [range_start, range_end]
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,29 @@
1
+ require 'oauth2'
2
+ require 'epo/ops'
3
+
4
+ module Epo
5
+ module Ops
6
+ # This class saves the token in memory, you may want to subclass this and
7
+ # overwrite #token if you want to store it somewhere else.
8
+ #
9
+ class TokenStore
10
+ def token
11
+ return generate_token if !@token || @token.expired?
12
+ @token
13
+ end
14
+
15
+ protected
16
+
17
+ def generate_token
18
+ client = OAuth2::Client.new(
19
+ Epo::Ops.config.consumer_key,
20
+ Epo::Ops.config.consumer_secret,
21
+ site: 'https://ops.epo.org/',
22
+ token_url: '/3.1/auth/accesstoken',
23
+ raise_errors: false
24
+ )
25
+ @token = client.client_credentials.get_token
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,35 @@
1
+ require 'redis'
2
+ require 'connection_pool'
3
+
4
+ module Epo
5
+ module Ops
6
+ class TokenStore
7
+ class Redis < TokenStore
8
+ def initialize(redis_host)
9
+ fail "Please install gems 'redis' and 'connection_pool' to use this feature" unless defined?(::Redis) && defined?(ConnectionPool)
10
+
11
+ @redis = ConnectionPool.new(size: 5, timeout: 5) { ::Redis.new(host: redis_host) }
12
+ end
13
+
14
+ def token
15
+ token = nil
16
+ @redis.conn do |conn|
17
+ token = conn.get("epo_token_#{id}")
18
+ end
19
+
20
+ token.present? ? OAuth2::AccessToken.new(client, token) : generate_token
21
+ end
22
+
23
+ private
24
+
25
+ def generate_token
26
+ super
27
+ Sidekiq.redis do |conn|
28
+ conn.set("epo_token_#{id}", token.token, ex: token.expires_in, nx: true)
29
+ end
30
+ token
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ module Epo
2
+ module Ops
3
+ class Util
4
+ # the path should be an array of strings indicating the path you want to go in the hash
5
+ def self.find_in_data(epo_hash, path)
6
+ path.reduce(epo_hash) { |res, c| parse_hash_flat(res, c) }
7
+ end
8
+
9
+ def self.parse_hash_flat(hash_layer, target)
10
+ result = []
11
+ if hash_layer.nil?
12
+ return []
13
+ elsif hash_layer.class == String
14
+ return []
15
+ elsif hash_layer.class == Array
16
+ result.concat(hash_layer.map { |x| parse_hash_flat(x, target) })
17
+ elsif hash_layer[target]
18
+ result << hash_layer[target]
19
+ elsif hash_layer.class == Hash || hash_layer.respond_to?(:to_h)
20
+ result.concat(hash_layer.to_h.map { |_x, y| parse_hash_flat(y, target) })
21
+ end
22
+ result.flatten
23
+ end
24
+
25
+ def self.parse_change_gazette_num(num)
26
+ res = /^(?<year>\d{4})\/(?<week>\d{2})$/.match(num)
27
+ return nil if res.nil?
28
+ Date.commercial(Integer(res[:year], 10), week = Integer(res[:week], 10))
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ module Epo
2
+ module Ops
3
+ VERSION = '0.1.5'.freeze
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: epo-ops
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Max Kießling
8
+ - Robert Terbach
9
+ - Michael Prilop
10
+ autorequire:
11
+ bindir: exe
12
+ cert_chain: []
13
+ date: 2016-03-01 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: bundler
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ version: '1.3'
29
+ - !ruby/object:Gem::Dependency
30
+ name: rake
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: '10.5'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ version: '10.5'
43
+ - !ruby/object:Gem::Dependency
44
+ name: minitest
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ~>
48
+ - !ruby/object:Gem::Version
49
+ version: '5.8'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ version: '5.8'
57
+ - !ruby/object:Gem::Dependency
58
+ name: vcr
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ~>
62
+ - !ruby/object:Gem::Version
63
+ version: '2.9'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ~>
69
+ - !ruby/object:Gem::Version
70
+ version: '2.9'
71
+ - !ruby/object:Gem::Dependency
72
+ name: webmock
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '1.22'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ~>
83
+ - !ruby/object:Gem::Version
84
+ version: '1.22'
85
+ - !ruby/object:Gem::Dependency
86
+ name: simplecov
87
+ requirement: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ type: :development
93
+ prerelease: false
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ - !ruby/object:Gem::Dependency
100
+ name: oauth2
101
+ requirement: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ~>
104
+ - !ruby/object:Gem::Version
105
+ version: '1.1'
106
+ type: :runtime
107
+ prerelease: false
108
+ version_requirements: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ~>
111
+ - !ruby/object:Gem::Version
112
+ version: '1.1'
113
+ description: This gem allows simple access to the European Patent Offices (EPO) Open
114
+ Patent Services (OPS) using their XML-API
115
+ email:
116
+ executables: []
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - .gitignore
121
+ - .travis.yml
122
+ - Gemfile
123
+ - LICENSE
124
+ - README.md
125
+ - Rakefile
126
+ - epo-ops.gemspec
127
+ - lib/epo/ops.rb
128
+ - lib/epo/ops/address.rb
129
+ - lib/epo/ops/bibliographic_document.rb
130
+ - lib/epo/ops/client.rb
131
+ - lib/epo/ops/error.rb
132
+ - lib/epo/ops/limits.rb
133
+ - lib/epo/ops/logger.rb
134
+ - lib/epo/ops/rate_limit.rb
135
+ - lib/epo/ops/register.rb
136
+ - lib/epo/ops/search_query_builder.rb
137
+ - lib/epo/ops/token_store.rb
138
+ - lib/epo/ops/token_store/redis.rb
139
+ - lib/epo/ops/util.rb
140
+ - lib/epo/ops/version.rb
141
+ homepage: https://github.com/FHG-IMW/epo-ops
142
+ licenses: []
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubyforge_project:
160
+ rubygems_version: 2.4.6
161
+ signing_key:
162
+ specification_version: 4
163
+ summary: Ruby interface to the European Patent Office API (OPS)
164
+ test_files: []