epo-ops 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7fdce426fc2fbb64f5f6c91f76f9892a0d9bcd59
4
+ data.tar.gz: ccc3a3ddd81354d151da39a4b37d01916fede709
5
+ SHA512:
6
+ metadata.gz: ef5f5150ab3eea7bb22675946a1f7c21c4fed46dcba1be39b187f633fe77a756aa261d4bc37a03e59b627005106a00450471da97b7eec5769c0435ed1ca13251
7
+ data.tar.gz: fb9e7f249f6330083d95ab565b2bb2824ea45f220b257fb52741ee76a0dc0165d493388ef2bfb29142e0c1aa4b0e45b76a59d726d96b1120cef49e8b29b6aceb
data/.gitignore ADDED
@@ -0,0 +1,37 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /test/epo_credentials.yml
12
+ /tmp/
13
+
14
+ ## Specific to RubyMotion:
15
+ .dat*
16
+ .repl_history
17
+ build/
18
+
19
+ ## Documentation cache and generated files:
20
+ /.yardoc/
21
+ /_yardoc/
22
+ /doc/
23
+ /rdoc/
24
+
25
+ ## Environment normalization:
26
+ /.bundle/
27
+ /vendor/bundle
28
+ /lib/bundler/man/
29
+
30
+ # for a library or gem, you might want to ignore these files since the code is
31
+ # intended to run in multiple environments; otherwise, check them in:
32
+ Gemfile.lock
33
+ # .ruby-version
34
+ # .ruby-gemset
35
+
36
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
37
+ .rvmrc
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.0.0"
4
+ - "2.1.0"
5
+ - "2.2.0"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in epo-ops.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 FHG-IMW
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ [![Build Status](https://travis-ci.org/FHG-IMW/epo-ops.svg?branch=master)](https://travis-ci.org/FHG-IMW/epo-ops)
2
+ [![Code Climate](https://codeclimate.com/github/FHG-IMW/epo-ops/badges/gpa.svg)](https://codeclimate.com/github/FHG-IMW/epo-ops)
3
+
4
+ # epo-ops
5
+ Ruby interface to the EPO Open Patent Services (OPS).
6
+
7
+ You can play around with the API [here](https://developers.epo.org/).
8
+ Documentation of it can be found [here](https://www.epo.org/searching-for-patents/technical/espacenet/ops.html) under `Downloads`.
9
+
10
+ # Usage
11
+
12
+ ## Authentification
13
+ In order to use this gem you need to register at the EPO for OAuth
14
+ [here](https://developers.epo.org/user/register).
15
+ Use your credentials by configuring
16
+ ```ruby
17
+ Epo::Ops.configure do |conf|
18
+ conf.consumer_key = "YOUR_KEY"
19
+ conf.consumer_secret = "YOUR_SECRET"
20
+ end
21
+ ```
22
+
23
+ ## What works up to now
24
+ * Search the EPO OPS register with `Epo::Ops::Register.search(query)`; use `Epo::Ops::SearchQueryBuilder` to build an appropriate request.
25
+ * Get bibliographic info from the register, both for application and publication references (which you may retrieve with the search).
26
+ * Bulk searching for all patents on a given date wih `Epo::Ops::Register::Bulk.all_queries(date)`. Note that patents are usually published on Wednesdays, if you find some on another weekday, please let us know.
27
+ This method currently returns all queries necessary to find all patents with `Epo::Ops::Register.search`
28
+
29
+ ### #search
30
+ Use the `SearchQueryBuilder` to set up the queries. By default structs are returned that should make it easier to work with the results, but with the `raw`-flag set to true you may also retrieve the resulting hash and parse it yourself.
31
+ The results have the method `#epodoc_reference` which perfectly fits into `#biblio`
32
+
33
+ ### #biblio
34
+ With `Epo::Ops::Register.biblio(reference_id)` you can retrieve the bibliographic entry for the given patent (see OPS documentation). By default it searches the `/application/` endpoint, but you may set `publication` as the second parameter. Make sure the `reference_id` matches the given type. The last optional parameter allows you to set another format the id, but the default `epodoc` is strongly advised. This format is also provided from search results with `#epodoc_reference`.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ t.libs << 'lib'
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task default: :test
data/epo-ops.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'epo/ops/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'epo-ops'
8
+ spec.version = Epo::Ops::VERSION
9
+ spec.authors = ['Max Kießling', 'Robert Terbach', 'Michael Prilop']
10
+
11
+ spec.summary = 'Ruby interface to the European Patent Office API (OPS)'
12
+ spec.description = 'This gem allows simple access to the European Patent'\
13
+ ' Offices (EPO) Open Patent Services (OPS) using their XML-API'
14
+ spec.homepage = 'https://github.com/FHG-IMW/epo-ops'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = 'exe'
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.3'
22
+ spec.add_development_dependency 'rake', '~> 10.5'
23
+ spec.add_development_dependency 'minitest', '~> 5.8'
24
+ spec.add_development_dependency 'vcr', '~> 2.9'
25
+ spec.add_development_dependency 'webmock', '~> 1.22'
26
+ spec.add_development_dependency 'simplecov'
27
+
28
+ spec.add_dependency 'oauth2', '~> 1.1'
29
+ end
data/lib/epo/ops.rb ADDED
@@ -0,0 +1,41 @@
1
+ require 'epo/ops/version'
2
+ require 'epo/ops/token_store'
3
+ require 'epo/ops/register'
4
+ require 'epo/ops/search_query_builder'
5
+
6
+ module Epo
7
+ module Ops
8
+ # Configure your OAuth credentials to use with this gem.
9
+ # @example
10
+ # Epo:Ops.configure do |conf|
11
+ # conf.consumer_key = "foo"
12
+ # conf.consumer_secret = "bar"
13
+ # end
14
+ # Optional parameter:
15
+ # conf.token_store (defaults to {Epo::Ops::TokenStore})
16
+ # @yieldparam [Configuration] configuration that is yielded.
17
+ def self.configure
18
+ yield(config)
19
+ end
20
+
21
+ # The {Configuration} used. You may want to call {Epo::Ops#configure} first.
22
+ # @return [Configuration] the configuration used.
23
+ def self.config
24
+ @configuration ||= Configuration.new
25
+ end
26
+
27
+ class Configuration
28
+ attr_accessor :consumer_key, :consumer_secret, :token_store
29
+
30
+ def initialize
31
+ @consumer_key = ''
32
+ @consumer_secret = ''
33
+ @token_store = Epo::Ops::TokenStore.new
34
+
35
+ OAuth2::Response.register_parser(:xml, ['application/xml']) do |body|
36
+ MultiXml.parse(body)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,60 @@
1
+ module Epo
2
+ module Ops
3
+ # Used to represent persons or companies (or both) in patents. Used for
4
+ # both, agents and applicants. Most of the time, when `name` is a person
5
+ # name, `address1` is a company name. Be aware that the addresses are in
6
+ # their respective local format.
7
+ #
8
+ # Current patents usually at least use the fields address1-3, so they should
9
+ # nearly always have values. Nevertheless, older ones often only use 1-2.
10
+ # Note also that EPOs schema documents fields like `street` or `city`, but
11
+ # by now they have not been used yet.
12
+ #
13
+ # @attr [String] name the name of an entity (one or more persons or
14
+ # companies)
15
+ # @attr [String] address1 first address line. May also be a company name
16
+ # @attr [String] address2 second address line
17
+ # @attr [String] address3 third address line, may be empty
18
+ # @attr [String] address4 fourth address line, may be empty
19
+ # @attr [String] address5 fifth address line, may be empty
20
+ # @attr [String] country_code two letter country code of the address
21
+ # @attr [Date] occurred_on the date an address occurred on, usually matching
22
+ # the entries change_gazette_num
23
+ # @attr [String] cdsid some kind of id the EPO provides, not sure yet if
24
+ # usable as reference.
25
+ class Address
26
+ attr_reader :name, :address1,
27
+ :address2,
28
+ :address3,
29
+ :address4,
30
+ :address5,
31
+ :country_code,
32
+ :occurred_on,
33
+ :cdsid
34
+ def initialize(name, address1, address2, address3, address4,
35
+ address5, country_code, occurred_on,
36
+ cdsid)
37
+ @address1 = address1
38
+ @address2 = address2
39
+ @address3 = address3 || ''
40
+ @address4 = address4 || ''
41
+ @address5 = address5 || ''
42
+ @name = name
43
+ @country_code = country_code || ''
44
+ @occurred_on = occurred_on || ''
45
+ @cdsid = cdsid || ''
46
+ end
47
+
48
+ # Compare addresses by the name and address fields.
49
+ # @return [Boolean]
50
+ def equal_name_and_address?(other)
51
+ name == other.name &&
52
+ address1 == other.address1 &&
53
+ address2 == other.address2 &&
54
+ address3 == other.address3 &&
55
+ address4 == other.address4 &&
56
+ address5 == other.address5
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,196 @@
1
+ require 'epo/ops/address'
2
+
3
+ module Epo
4
+ module Ops
5
+ # Parses and simplifies the elements the EPO OPS returns for bibliographic
6
+ # documents. Parsing is done lazily.
7
+ # Some elements are not yet fully parsed but hashes returned instead.
8
+ # Not all information available is parsed (e.g. inventors), if you need
9
+ # more fields, add them here.
10
+ class BibliographicDocument
11
+ # @return [Hash] a nested Hash, which is a parsed XML response of the
12
+ # `/biblio` endpoint of the EPO APIs.
13
+ # @see Client
14
+ attr_reader :raw
15
+
16
+ def initialize(raw)
17
+ @raw = raw
18
+ end
19
+
20
+ # A number by which a patent is uniquely identifiable and querieable.
21
+ # The first two letters are the country code of the processing patent
22
+ # office, for european patents this is EP.
23
+ # @return [String] application number.
24
+ def application_nr
25
+ @application_nr ||= parse_application_nr
26
+ end
27
+
28
+ # @return [String] The URL at which you can query the original document.
29
+ def url
30
+ @url ||= "https://ops.epo.org/3.1/rest-services/register/application/epodoc/#{application_nr}"
31
+ end
32
+
33
+ # @return [String] the english title of the patent @note Titles are
34
+ # usually available at least in english, french and german.
35
+ # Other languages are also possible.
36
+ def title
37
+ @title ||= parse_title
38
+ end
39
+
40
+ # @return [Array] a list of the IPC-Classifications, as strings.
41
+ # Format is set by EPO, should be similar to: E06B7/23
42
+ def classifications
43
+ @classifications ||= parse_classification raw
44
+ end
45
+
46
+ # Agents and applicants are subject to change at EPO, often
47
+ # their names or addresses are updated, sometimes other
48
+ # people/companies appear or disappear.
49
+ #
50
+ # @return [Array] Array of {Address}
51
+ def agents
52
+ @agents ||= parse_agents raw
53
+ end
54
+
55
+ # (see #agents)
56
+ def applicants
57
+ @applicants ||= parse_applicants raw
58
+ end
59
+
60
+ # @return [String] the string representation of the current patent status as
61
+ # described by the EPO
62
+ def status
63
+ @status ||= parse_status raw
64
+ end
65
+
66
+ # Many fields of the XML the EPO provides have a field
67
+ # `change_gazette_num`. It is a commercial date (year + week)
68
+ # that describes in which week the element has been
69
+ # changed. This method parses them and returns the most recent
70
+ # date found.
71
+ # @return [Date] the latest date found in the document.
72
+ def latest_update
73
+ @latest ||= parse_latest_update raw
74
+ end
75
+
76
+ # The priority date describes the first document that was filed at any
77
+ # patent office in the world regarding this patent.
78
+ # @return [Hash] a hash which descibes the filed priority with the fields:
79
+ # `country` `doc_number`, `date`, `kind`, and `sequence`
80
+ def priority_date
81
+ @priority_date ||= parse_priority_date raw
82
+ end
83
+
84
+ # @return [Array] List of hashes containing information about publications
85
+ # made, entries exist for multiple types of publications, e.g. A1, B1.
86
+ def publication_references
87
+ @publication_dates ||= parse_publication_references raw
88
+ end
89
+
90
+ def effective_date
91
+ @effective_date ||= parse_effective_date raw
92
+ end
93
+
94
+ private
95
+
96
+ def parse_title
97
+ titles = Util.find_in_data(raw,
98
+ path_to_bibliographic_data +
99
+ ['invention_title'])
100
+ titles.each do |the_title|
101
+ return the_title['__content__'] if the_title['lang'] == 'en'
102
+ end
103
+ # no english title found
104
+ titles.first['__content__']
105
+ end
106
+
107
+ def parse_application_nr
108
+ path = %w(world_patent_data register_search query __content__)
109
+ Util.find_in_data(raw, path).first.partition('=').last
110
+ end
111
+
112
+ def parse_priority_date(raw)
113
+ priority_claims = Util.find_in_data(raw,
114
+ path_to_bibliographic_data +
115
+ %w(priority_claims))
116
+ .first
117
+ if priority_claims.nil?
118
+ priority_date = nil
119
+ else
120
+ priority_date = priority_claims['priority_claim'].is_a?(Hash) ? priority_claims['priority_claim'] : priority_claims['priority_claim'].first
121
+ end
122
+ priority_date
123
+ end
124
+
125
+ def parse_publication_references(raw)
126
+ Util.parse_hash_flat(
127
+ Util.find_in_data(raw,
128
+ path_to_bibliographic_data +
129
+ %w(publication_reference)), 'document_id')
130
+ end
131
+
132
+ def parse_effective_date(raw)
133
+ effective_date =
134
+ Util.find_in_data(raw,
135
+ path_to_bibliographic_data +
136
+ %w(dates_rights_effective request_for_examination))
137
+ effective_date.first.nil? ? nil : effective_date.first['date']
138
+ end
139
+
140
+ def parse_latest_update(raw)
141
+ gazette_nums = Util.parse_hash_flat(raw, 'change_gazette_num')
142
+ nums = gazette_nums.map { |num| Util.parse_change_gazette_num(num) }.keep_if { |match| !match.nil? }
143
+ nums.max
144
+ end
145
+
146
+ def parse_status(raw)
147
+ Util.find_in_data(raw,
148
+ path_to_bibliographic_data + ['status'])
149
+ .first
150
+ end
151
+
152
+ def parse_classification(raw)
153
+ Util.find_in_data(raw,
154
+ path_to_bibliographic_data +
155
+ %w(classifications_ipcr classification_ipcr text))
156
+ .first.split(',').map(&:strip)
157
+ end
158
+
159
+ def parse_agents(raw)
160
+ entries = Util.find_in_data(raw,
161
+ path_to_bibliographic_data +
162
+ %w(parties agents))
163
+ parse_address(entries, 'agent')
164
+ end
165
+
166
+ def parse_applicants(raw)
167
+ entries = Util.find_in_data(raw,
168
+ path_to_bibliographic_data +
169
+ %w(parties applicants))
170
+ parse_address(entries, 'applicant')
171
+ end
172
+
173
+ def parse_address(party_group_entries, group)
174
+ party_group_entries.flat_map do |entry|
175
+ change_date = Util.parse_change_gazette_num(
176
+ entry.fetch('change_gazette_num', '')) || latest_update
177
+ Util.find_in_data(entry, [group, 'addressbook']).map do |address|
178
+ Address.new(address['name'],
179
+ address['address']['address_1'],
180
+ address['address']['address_2'],
181
+ address['address']['address_3'],
182
+ address['address']['address_4'],
183
+ address['address']['address_5'],
184
+ address['address']['country'],
185
+ change_date,
186
+ address['cdsid'])
187
+ end
188
+ end
189
+ end
190
+
191
+ def path_to_bibliographic_data
192
+ %w(world_patent_data register_search register_documents register_document bibliographic_data)
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,16 @@
1
+ require 'epo/ops/token_store'
2
+ require 'epo/ops/error'
3
+
4
+ module Epo
5
+ module Ops
6
+ class Client
7
+ # @return [OAuth2::Response]
8
+ def self.request(verb, url, options = {})
9
+ token = Epo::Ops.config.token_store.token
10
+ response = token.request(verb, URI.encode(url), options)
11
+ fail Error.from_response(response) unless response.status == 200
12
+ response
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,85 @@
1
+ require 'epo/ops/rate_limit'
2
+
3
+ module Epo
4
+ module Ops
5
+ class Error < StandardError
6
+ # @return [Integer]
7
+ attr_reader :code, :rate_limit
8
+
9
+ # Raised when EPO returns a 4xx HTTP status code
10
+ ClientError = Class.new(self)
11
+ # Raised when EPO returns the HTTP status code 400
12
+ BadRequest = Class.new(ClientError)
13
+ # Raised when EPO returns the HTTP status code 401
14
+ Unauthorized = Class.new(ClientError)
15
+ # Raised when EPO returns the HTTP status code 403
16
+ Forbidden = Class.new(ClientError)
17
+ # Raised when EPO returns the HTTP status code 404
18
+ NotFound = Class.new(ClientError)
19
+ # Raised when EPO returns the HTTP status code 406
20
+ NotAcceptable = Class.new(ClientError)
21
+ # Raised when EPO returns the HTTP status code 422
22
+ UnprocessableEntity = Class.new(ClientError)
23
+ # Raised when EPO returns the HTTP status code 429
24
+ TooManyRequests = Class.new(ClientError)
25
+ # Raised when EPO returns a 5xx HTTP status code
26
+ ServerError = Class.new(self)
27
+ # Raised when EPO returns the HTTP status code 500
28
+ InternalServerError = Class.new(ServerError)
29
+ # Raised when EPO returns the HTTP status code 502
30
+ BadGateway = Class.new(ServerError)
31
+ # Raised when EPO returns the HTTP status code 503
32
+ ServiceUnavailable = Class.new(ServerError)
33
+ # Raised when EPO returns the HTTP status code 504
34
+ GatewayTimeout = Class.new(ServerError)
35
+
36
+ ERRORS = {
37
+ 400 => Epo::Ops::Error::BadRequest,
38
+ 401 => Epo::Ops::Error::Unauthorized,
39
+ 403 => Epo::Ops::Error::Forbidden,
40
+ 404 => Epo::Ops::Error::NotFound,
41
+ 406 => Epo::Ops::Error::NotAcceptable,
42
+ 422 => Epo::Ops::Error::UnprocessableEntity,
43
+ 429 => Epo::Ops::Error::TooManyRequests,
44
+ 500 => Epo::Ops::Error::InternalServerError,
45
+ 502 => Epo::Ops::Error::BadGateway,
46
+ 503 => Epo::Ops::Error::ServiceUnavailable,
47
+ 504 => Epo::Ops::Error::GatewayTimeout
48
+ }.freeze
49
+ FORBIDDEN_MESSAGES = {
50
+ 'This request has been rejected due to the violation of Fair Use policy' => Epo::Ops::Error::TooManyRequests
51
+ }.freeze
52
+
53
+ class << self
54
+ # Parses an error from the given response
55
+ # @return [Error]
56
+ def from_response(response)
57
+ code = response.status
58
+ message = parse_error(response.parsed)
59
+
60
+ if code == 403 && FORBIDDEN_MESSAGES[message]
61
+ FORBIDDEN_MESSAGES[message].new(message, response.headers, code)
62
+ else
63
+ ERRORS[code].new(message, response.headers, code)
64
+ end
65
+ end
66
+
67
+ private
68
+
69
+ def parse_error(body)
70
+ if body.nil? || body.empty?
71
+ nil
72
+ elsif body['error'] && body['error']['message']
73
+ body['error']['message']
74
+ end
75
+ end
76
+ end
77
+
78
+ def initialize(message = '', rate_limit = {}, code = nil)
79
+ super(message)
80
+ @code = code
81
+ @rate_limit = RateLimit.new(rate_limit)
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,22 @@
1
+ module Epo
2
+ module Ops
3
+ # The register search is limited by some parameters. With one
4
+ # query one may only request as many as
5
+ # {Epo::Ops::Limits::MAX_QUERY_INTERVAL} references at once.
6
+ # Considering this, you have to split your requests by this
7
+ # interval. Nevertheless, the maximum value you may use is
8
+ # {Epo::Ops::Limits::MAX_QUERY_RANGE}. If you want to retrieve more
9
+ # references you must split by other parameters.
10
+ # @see Register
11
+ class Limits
12
+ # @return [Integer] The range in which you can search is limited, say you
13
+ # cannot request all patents of a given class at once, you probably must
14
+ # split your requests by additional conditions.
15
+ MAX_QUERY_RANGE = 2000
16
+
17
+ # @return [Integer] The maximum number of elements you may search with one
18
+ # query. Ignoring this will result in errors.
19
+ MAX_QUERY_INTERVAL = 100
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,11 @@
1
+ module Epo
2
+ module Ops
3
+ # Simple logger writing some notifications to standard output.
4
+ class Logger
5
+ # Just hands the parameter to puts.
6
+ def self.log(output)
7
+ puts output
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,49 @@
1
+ module Epo
2
+ module Ops
3
+ class RateLimit
4
+ WEEKLY_QUOTA_RESET_TIME = 604_800
5
+ HOURLY_QUOTA_RESET_TIME = 600
6
+ BASE_RESET_TIME = 60
7
+
8
+ attr_reader :attr
9
+
10
+ def initialize(http_header)
11
+ fail "Rate Limit data should be a Hash but is #{http_header.inspect} (#{http_header.class.name})" unless http_header.is_a?(Hash)
12
+ @attr = http_header
13
+ end
14
+
15
+ def limit_reached?
16
+ @attr.key?('x-rejection-reason')
17
+ end
18
+
19
+ def rejection_reason
20
+ return nil unless @attr['x-rejection-reason']
21
+ case @attr['x-rejection-reason']
22
+ when 'RegisteredQuotaPerWeek' then :weekly_quota
23
+ when 'IndividualQuotaPerHour' then :hourly_quota
24
+ else :unknown_reason
25
+ end
26
+ end
27
+
28
+ def hourly_quota
29
+ quota = @attr['x-individualquotaperhour-used']
30
+ quota.to_i if quota
31
+ end
32
+
33
+ def weekly_quota
34
+ quota = @attr['x-registeredquotaperweek-used']
35
+ quota.to_i if quota
36
+ end
37
+
38
+ def reset_at
39
+ return unless limit_reached?
40
+
41
+ case rejection_reason
42
+ when :weekly_quota then Time.now.to_i + WEEKLY_QUOTA_RESET_TIME
43
+ when :hourly_quota then Time.now.to_i + HOURLY_QUOTA_RESET_TIME
44
+ else Time.now.to_i + BASE_RESET_TIME
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,177 @@
1
+ require 'epo/ops'
2
+ require 'epo/ops/client'
3
+ require 'epo/ops/util'
4
+ require 'epo/ops/bibliographic_document'
5
+ require 'epo/ops/logger'
6
+
7
+ module Epo
8
+ module Ops
9
+ # Access to the {http://ops.epo.org/3.1/rest-services/register register}
10
+ # endpoint of the EPO OPS API.
11
+ #
12
+ # By now you can search and retrieve patents by using the type `application`
13
+ # in the `epodoc` format.
14
+ #
15
+ # Search queries are limited by size, not following these limits
16
+ # will result in errors.
17
+ #
18
+ # @see Limits
19
+ # @see SearchQueryBuilder
20
+ class Register
21
+ # Helper class that assists in building the queries necessary to search
22
+ # for more patents than possible with one query respecting the given
23
+ # limits.
24
+ #
25
+ # @see Limits
26
+ class Bulk
27
+ # Helper method returning all unique register references on a given
28
+ # date. This is the same as executing all queries from {.all_queries}
29
+ # and making the results unique.
30
+ #
31
+ # @note Patents may have more than one IPC class, they would appear
32
+ # more than once, this method filters these by `doc_number`
33
+ def self.all_register_references(date)
34
+ begin
35
+ queries = Bulk.all_queries(date)
36
+ search_entries = queries.flat_map do |query|
37
+ Register.search(query)
38
+ end
39
+ rescue ::Epo::Ops::Error::NotFound
40
+ return []
41
+ end
42
+ search_entries.map(&:application_reference)
43
+ .uniq(&:doc_number)
44
+ end
45
+
46
+ # Build the queries to search for all patents on a given date.
47
+ #
48
+ # The offset of EPOs register search may at max be 2000, if more patents
49
+ # are published on one day the queries must be split; here across the
50
+ # first level of ipc classification.
51
+ # At time of this writing they are mostly below 1000, there should be
52
+ # plenty of space for now.
53
+ #
54
+ # In case the limits change, they can be found in {Epo::Ops::Limits}
55
+ # Should there be more than 2000 patents in one class, a message will
56
+ # be logged, please file an Issue if that happens.
57
+ #
58
+ # @return [Array] containing all queries to put into {Register.search}.
59
+ # @note The queries are split by IPC-classes if necessary; Patents may
60
+ # have more than one, you might get multiple references to the same
61
+ # patent.
62
+ # @see .all_register_references
63
+ def self.all_queries(date)
64
+ overall_count = published_patents_count(date)
65
+ if overall_count > Limits::MAX_QUERY_RANGE
66
+ patent_count_by_ipc_classes(date).flat_map do |ipc_class, count|
67
+ builder = SearchQueryBuilder.new
68
+ .publication_date(date.year, date.month, date.day)
69
+ .and
70
+ .ipc_class(ipc_class)
71
+ split_by_size_limits(builder, count)
72
+ end
73
+ else
74
+ builder = SearchQueryBuilder.new
75
+ .publication_date(date.year, date.month, date.day)
76
+ split_by_size_limits(builder, overall_count)
77
+ end
78
+ end
79
+
80
+ # @return [Hash] For all top level IPC classes (A-H) => count
81
+ def self.patent_count_by_ipc_classes(date)
82
+ ipc_classes = %w(A B C D E F G H)
83
+ ipc_classes.inject({}) do |mem, ipcc|
84
+ mem[ipcc] = published_patents_count(date, ipcc)
85
+ if mem[ipcc] > Limits::MAX_QUERY_RANGE
86
+ Logger.log("IPC class #{ipcc} has more than #{Epo::Ops::Limits::MAX_QUERY_RANGE} on #{date}. They can not all be retrieved. Please file this as an issue!")
87
+ end
88
+ mem
89
+ end
90
+ end
91
+
92
+ # Splits the queries build by `query_builder` by the allowed intervals.
93
+ #
94
+ # @param query_builder [SearchQueryBuilder] with all settings made, but
95
+ # not built yet.
96
+ # @param patent_count [Integer] number of overall results expected.
97
+ # See {.published_patents_count}
98
+ #
99
+ # @return [Array] of Strings, each a query to put into {Register.search}
100
+ def self.split_by_size_limits(query_builder, patent_count)
101
+ max_interval = Limits::MAX_QUERY_INTERVAL
102
+ (1..patent_count).step(max_interval).map do |start|
103
+ query_builder.build(start, [start + max_interval - 1, patent_count].min)
104
+ end
105
+ end
106
+
107
+ # makes a minimum request to find out how many patents are published on
108
+ # that date
109
+ #
110
+ # @return [Integer] number of patents on that date.
111
+ def self.published_patents_count(date, ipc_class = nil)
112
+ query = SearchQueryBuilder.new
113
+ query.publication_date(date.year, date.month, date.day)
114
+ query.and.ipc_class(ipc_class) if ipc_class
115
+ query = query.build(1, 2)
116
+ minimum_result_set = Register.search(query, true)
117
+ return 0 if minimum_result_set.empty?
118
+ minimum_result_set['world_patent_data']['register_search']['total_result_count'].to_i
119
+ end
120
+ end
121
+
122
+ # @param query A query built with {Epo::Ops::SearchQueryBuilder}
123
+ # @param raw if `true` the result will be the raw response as a nested hash.
124
+ # if false(default) the result will be parsed further, returning a list of [SearchEntry]
125
+ # @return [Array] containing {SearchEntry}
126
+ def self.search(query, raw = false)
127
+ hash = Client.request(:get, register_api_string + query).parsed
128
+ return parse_search_results(hash) unless raw
129
+ hash
130
+ end
131
+
132
+ # @param format epodoc is a format defined by the EPO for a
133
+ # document id. see their documentation.
134
+ # @param type may be `application` or `publication` make sure that the
135
+ # `reference_id` is matching
136
+ # @param raw flag if the result should be returned as a raw Hash or
137
+ # parsed as {BibliographicDocument}
138
+ # @return [BibliographicDocument, Hash]
139
+ def self.biblio(reference_id, type = 'application', format = 'epodoc', raw = false)
140
+ request = "#{register_api_string}#{type}/#{format}/#{reference_id}/biblio"
141
+ result = Client.request(:get, request).parsed
142
+ raw ? result : BibliographicDocument.new(result)
143
+ end
144
+
145
+ Reference = Struct.new(:country, :doc_number, :date) do
146
+ def epodoc_reference
147
+ country + doc_number
148
+ end
149
+ end
150
+
151
+ SearchEntry = Struct.new(:publication_reference, :application_reference, :ipc_classes)
152
+
153
+ private
154
+
155
+ def self.parse_search_results(result)
156
+ path = %w(world_patent_data register_search register_documents register_document bibliographic_data)
157
+
158
+ list = Util.find_in_data(result, path)
159
+ list.map do |entry|
160
+ publication_reference = Reference.new(
161
+ entry['publication_reference']['document_id']['country'],
162
+ entry['publication_reference']['document_id']['doc_number'],
163
+ entry['publication_reference']['document_id']['date'])
164
+ application_reference = Reference.new(
165
+ entry['application_reference']['document_id']['country'],
166
+ entry['application_reference']['document_id']['doc_number'])
167
+ ipc_classes = entry['classifications_ipcr']['classification_ipcr']['text'].split(';;').map(&:strip)
168
+ SearchEntry.new(publication_reference, application_reference, ipc_classes)
169
+ end
170
+ end
171
+
172
+ def self.register_api_string
173
+ '/3.1/rest-services/register/'
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,70 @@
1
+ require 'epo/ops/limits'
2
+ require 'epo/ops/logger'
3
+
4
+ module Epo
5
+ module Ops
6
+ # This Builder helps creating a search query using
7
+ # {https://www.loc.gov/standards/sru/cql/ CQL} (Common Query Language or
8
+ # Contextual Query Language) with the identifies specified by the EPO in
9
+ # the OPS Documentation chapter 4.2 ( {https://www.epo.org/searching-for-patents/technical/espacenet/ops.html Link}
10
+ # - use tab Downloads and see file 'OPS version 3.1 documentation').
11
+ # Dont use a builder twice ;)
12
+ class SearchQueryBuilder
13
+ def initialize
14
+ @query = 'search?q='
15
+ end
16
+
17
+ def publication_date(year, month, day)
18
+ @query << "pd=#{('%04d' % year) << ('%02d' % month) << ('%02d' % day)}"
19
+ self
20
+ end
21
+
22
+ def and
23
+ @query << ' and '
24
+ self
25
+ end
26
+
27
+ def ipc_class(ipc_class)
28
+ @query << "ic=#{ipc_class}"
29
+ # TODO: ipc_class richtig formatieren
30
+ self
31
+ end
32
+
33
+ # builds the search query ready to put into the register API. The
34
+ # parameters are validated with {#validate_range}.
35
+ # This does not change the query, several calls will allow you to
36
+ # create the same queries for different ranges.
37
+ def build(range_start = 1, range_end = nil)
38
+ range_end ||= range_start + Limits::MAX_QUERY_INTERVAL - 1
39
+ validated_range = validate_range range_start, range_end
40
+ @query + "&Range=#{validated_range[0]}-#{validated_range[1]}"
41
+ end
42
+
43
+ # Fixes the range given so that they meed the EPO APIs rules. The range
44
+ # may only be 100 elements long, the maximum allowed value is 2000.
45
+ # If the given window is out of range, it will be moved preserving the
46
+ # distance covered.
47
+ # @see Epo::Ops::Limits
48
+ # @return array with two elements: [range_start, range_end]
49
+ def validate_range(range_start, range_end)
50
+ if range_start > range_end
51
+ range_start, range_end = range_end, range_start
52
+ Logger.log('range_start was bigger than range_end, swapped values')
53
+ elsif range_start == range_end || range_end - range_start > Limits::MAX_QUERY_INTERVAL - 1
54
+ range_end = range_start + Limits::MAX_QUERY_INTERVAL - 1
55
+ Logger.log("range invalid, set to: #{[range_start, range_end]}")
56
+ end
57
+ if range_start < 1
58
+ range_end = range_end - range_start + 1
59
+ range_start = 1
60
+ Logger.log("range_start must be > 0, set to: #{[range_start, range_end]}")
61
+ elsif range_end > Limits::MAX_QUERY_RANGE
62
+ range_start = Limits::MAX_QUERY_RANGE - (range_end - range_start)
63
+ range_end = Limits::MAX_QUERY_RANGE
64
+ Logger.log("range_end was too big, set to: #{[range_start, range_end]}")
65
+ end
66
+ [range_start, range_end]
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,29 @@
1
+ require 'oauth2'
2
+ require 'epo/ops'
3
+
4
+ module Epo
5
+ module Ops
6
+ # This class saves the token in memory, you may want to subclass this and
7
+ # overwrite #token if you want to store it somewhere else.
8
+ #
9
+ class TokenStore
10
+ def token
11
+ return generate_token if !@token || @token.expired?
12
+ @token
13
+ end
14
+
15
+ protected
16
+
17
+ def generate_token
18
+ client = OAuth2::Client.new(
19
+ Epo::Ops.config.consumer_key,
20
+ Epo::Ops.config.consumer_secret,
21
+ site: 'https://ops.epo.org/',
22
+ token_url: '/3.1/auth/accesstoken',
23
+ raise_errors: false
24
+ )
25
+ @token = client.client_credentials.get_token
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,35 @@
1
+ require 'redis'
2
+ require 'connection_pool'
3
+
4
+ module Epo
5
+ module Ops
6
+ class TokenStore
7
+ class Redis < TokenStore
8
+ def initialize(redis_host)
9
+ fail "Please install gems 'redis' and 'connection_pool' to use this feature" unless defined?(::Redis) && defined?(ConnectionPool)
10
+
11
+ @redis = ConnectionPool.new(size: 5, timeout: 5) { ::Redis.new(host: redis_host) }
12
+ end
13
+
14
+ def token
15
+ token = nil
16
+ @redis.conn do |conn|
17
+ token = conn.get("epo_token_#{id}")
18
+ end
19
+
20
+ token.present? ? OAuth2::AccessToken.new(client, token) : generate_token
21
+ end
22
+
23
+ private
24
+
25
+ def generate_token
26
+ super
27
+ Sidekiq.redis do |conn|
28
+ conn.set("epo_token_#{id}", token.token, ex: token.expires_in, nx: true)
29
+ end
30
+ token
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ module Epo
2
+ module Ops
3
+ class Util
4
+ # the path should be an array of strings indicating the path you want to go in the hash
5
+ def self.find_in_data(epo_hash, path)
6
+ path.reduce(epo_hash) { |res, c| parse_hash_flat(res, c) }
7
+ end
8
+
9
+ def self.parse_hash_flat(hash_layer, target)
10
+ result = []
11
+ if hash_layer.nil?
12
+ return []
13
+ elsif hash_layer.class == String
14
+ return []
15
+ elsif hash_layer.class == Array
16
+ result.concat(hash_layer.map { |x| parse_hash_flat(x, target) })
17
+ elsif hash_layer[target]
18
+ result << hash_layer[target]
19
+ elsif hash_layer.class == Hash || hash_layer.respond_to?(:to_h)
20
+ result.concat(hash_layer.to_h.map { |_x, y| parse_hash_flat(y, target) })
21
+ end
22
+ result.flatten
23
+ end
24
+
25
+ def self.parse_change_gazette_num(num)
26
+ res = /^(?<year>\d{4})\/(?<week>\d{2})$/.match(num)
27
+ return nil if res.nil?
28
+ Date.commercial(Integer(res[:year], 10), week = Integer(res[:week], 10))
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ module Epo
2
+ module Ops
3
+ VERSION = '0.1.5'.freeze
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: epo-ops
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Max Kießling
8
+ - Robert Terbach
9
+ - Michael Prilop
10
+ autorequire:
11
+ bindir: exe
12
+ cert_chain: []
13
+ date: 2016-03-01 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: bundler
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ version: '1.3'
29
+ - !ruby/object:Gem::Dependency
30
+ name: rake
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: '10.5'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ version: '10.5'
43
+ - !ruby/object:Gem::Dependency
44
+ name: minitest
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ~>
48
+ - !ruby/object:Gem::Version
49
+ version: '5.8'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ version: '5.8'
57
+ - !ruby/object:Gem::Dependency
58
+ name: vcr
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ~>
62
+ - !ruby/object:Gem::Version
63
+ version: '2.9'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ~>
69
+ - !ruby/object:Gem::Version
70
+ version: '2.9'
71
+ - !ruby/object:Gem::Dependency
72
+ name: webmock
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '1.22'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ~>
83
+ - !ruby/object:Gem::Version
84
+ version: '1.22'
85
+ - !ruby/object:Gem::Dependency
86
+ name: simplecov
87
+ requirement: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ type: :development
93
+ prerelease: false
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ - !ruby/object:Gem::Dependency
100
+ name: oauth2
101
+ requirement: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ~>
104
+ - !ruby/object:Gem::Version
105
+ version: '1.1'
106
+ type: :runtime
107
+ prerelease: false
108
+ version_requirements: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ~>
111
+ - !ruby/object:Gem::Version
112
+ version: '1.1'
113
+ description: This gem allows simple access to the European Patent Offices (EPO) Open
114
+ Patent Services (OPS) using their XML-API
115
+ email:
116
+ executables: []
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - .gitignore
121
+ - .travis.yml
122
+ - Gemfile
123
+ - LICENSE
124
+ - README.md
125
+ - Rakefile
126
+ - epo-ops.gemspec
127
+ - lib/epo/ops.rb
128
+ - lib/epo/ops/address.rb
129
+ - lib/epo/ops/bibliographic_document.rb
130
+ - lib/epo/ops/client.rb
131
+ - lib/epo/ops/error.rb
132
+ - lib/epo/ops/limits.rb
133
+ - lib/epo/ops/logger.rb
134
+ - lib/epo/ops/rate_limit.rb
135
+ - lib/epo/ops/register.rb
136
+ - lib/epo/ops/search_query_builder.rb
137
+ - lib/epo/ops/token_store.rb
138
+ - lib/epo/ops/token_store/redis.rb
139
+ - lib/epo/ops/util.rb
140
+ - lib/epo/ops/version.rb
141
+ homepage: https://github.com/FHG-IMW/epo-ops
142
+ licenses: []
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubyforge_project:
160
+ rubygems_version: 2.4.6
161
+ signing_key:
162
+ specification_version: 4
163
+ summary: Ruby interface to the European Patent Office API (OPS)
164
+ test_files: []