wikimelon 0.0.2 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +5 -2
- data/CHANGELOG.md +24 -1
- data/Gemfile +1 -3
- data/README.md +92 -10
- data/lib/wikimelon/faraday.rb +5 -5
- data/lib/wikimelon/item.rb +9 -0
- data/lib/wikimelon/property.rb +9 -0
- data/lib/wikimelon/reference.rb +22 -0
- data/lib/wikimelon/request.rb +15 -3
- data/lib/wikimelon/resource.rb +53 -0
- data/lib/wikimelon/search_result.rb +21 -0
- data/lib/wikimelon/statement.rb +46 -0
- data/lib/wikimelon/throttle.rb +31 -0
- data/lib/wikimelon/version.rb +1 -1
- data/lib/wikimelon.rb +69 -2
- data/wikimelon.gemspec +5 -5
- metadata +36 -34
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 970d35750cfe012f2c2c05b004e6cf2b0bfcc9653e27762787a37d6d6f2b1dbb
|
|
4
|
+
data.tar.gz: 3d935812100ba31d1cdb0b25299e9a793f78cfec3b6f03aaa0f29659b43aaf18
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4342be554828c2f669f9b9b2ef4006176ebbebb3e8a31c02cf7dd9d2caae70da229157665c188aad7ba9a8032dcbfb7aa8a7e80fe63028486f770f776d30805c
|
|
7
|
+
data.tar.gz: a23adc12cb3d590a3ea52a861f864c067548c4ca5cef22d19a8ebbf8a1bfc3fce6ca36a0565aaf4263858e737d39054fbe4bf9282f504f473e3a9b1fd64fd17f
|
data/.github/workflows/main.yml
CHANGED
|
@@ -5,12 +5,15 @@ on: [push,pull_request]
|
|
|
5
5
|
jobs:
|
|
6
6
|
build:
|
|
7
7
|
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
matrix:
|
|
10
|
+
ruby-version: ['3.3.4', '4.0.0']
|
|
8
11
|
steps:
|
|
9
12
|
- uses: actions/checkout@v2
|
|
10
|
-
- name: Set up Ruby
|
|
13
|
+
- name: Set up Ruby ${{ matrix.ruby-version }}
|
|
11
14
|
uses: ruby/setup-ruby@v1
|
|
12
15
|
with:
|
|
13
|
-
ruby-version:
|
|
16
|
+
ruby-version: ${{ matrix.ruby-version }}
|
|
14
17
|
bundler-cache: true
|
|
15
18
|
- name: Run the default task
|
|
16
19
|
run: bundle exec rake
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
## [0.1.1] - 2026-06-17
|
|
4
|
+
- Allow Ruby 4.0.0:
|
|
5
|
+
- Relaxed `required_ruby_version` to `>= 2.7.0, < 5.0`
|
|
6
|
+
- Added Ruby 4.0.0 to the CI matrix
|
|
7
|
+
- Bumped `faraday-follow_redirects` upper bound to allow 0.5+ (which lifts the Ruby < 4 cap)
|
|
8
|
+
- Added `rexml` and `irb` development dependencies (no longer in stdlib on Ruby 4)
|
|
9
|
+
- Fixed copy-paste residue: 503 error message said "Crossref is rate limiting your requests." (Crossref is the upstream serrano template's API) — changed to "Wikidata is rate limiting your requests."
|
|
10
|
+
- Switched `test_find_and_label` and `test_item_find_many_preserves_order` from Q42 (Douglas Adams) to Q1/Q5 because Q42's English label was vandalized on Wikidata
|
|
11
|
+
|
|
12
|
+
## [0.1.0] - 2026-05-04
|
|
13
|
+
- Added `Wikimelon.exists?` to check whether a P/Q ID resolves to an entity at the exact requested ID (catches missing IDs and merge/redirect cases)
|
|
14
|
+
- Added `Wikimelon::Item` and `Wikimelon::Property` wrappers with helpers for `label`, `description`, `aliases`, `sitelink`, `claim(s)`, and `datatype`
|
|
15
|
+
- Added `Wikimelon::Statement` for unwrapping claim values (entity-id, time, string, external-id, monolingualtext, quantity)
|
|
16
|
+
- Added `Wikimelon.default_language` config (defaults to `"en"`)
|
|
17
|
+
- Added `Wikimelon.request_interval` config for client-side throttling between requests
|
|
18
|
+
- Added retry-on-429/503 with exponential backoff via `faraday-retry`, configurable through `Wikimelon.retry_max` (disabled by default) and `Wikimelon.retry_interval`; honors `Retry-After` headers
|
|
19
|
+
- Added `revision_id:` keyword to `Item.find` / `Property.find`
|
|
20
|
+
- Added `Wikimelon.api_url` and `Wikimelon.sparql_url` configs for pointing at self-hosted Wikibase instances
|
|
21
|
+
- Added `Item.search` / `Property.search` (fuzzy search by label/alias via `wbsearchentities`) returning `Wikimelon::SearchResult` objects
|
|
22
|
+
- Added `Item.find_many` / `Property.find_many` for batch entity fetch via `wbgetentities` (auto-chunks to the API's 50-per-request limit)
|
|
23
|
+
- Dropped the `multi_json` runtime dependency in favor of stdlib `JSON`
|
|
24
|
+
- Bumped the minimum Ruby version to 2.7
|
|
25
|
+
|
|
3
26
|
## [0.0.2] - 2025-03-06
|
|
4
27
|
- Added entity endpoint
|
|
5
28
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -36,23 +36,105 @@ Wikimelon.query(query) # => MultiJson object
|
|
|
36
36
|
```
|
|
37
37
|
|
|
38
38
|
---
|
|
39
|
-
###
|
|
40
|
-
Fetch
|
|
39
|
+
### Items
|
|
40
|
+
Fetch a Wikidata item (`Q…`). Returns a `Wikimelon::Item` with helpers for labels, descriptions, aliases, sitelinks, and claims:
|
|
41
41
|
```ruby
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
item = Wikimelon::Item.find("Q42")
|
|
43
|
+
|
|
44
|
+
item.id # => "Q42"
|
|
45
|
+
item.exists? # => true
|
|
46
|
+
item.label # => "Douglas Adams"
|
|
47
|
+
item.label("de") # => "Douglas Adams"
|
|
48
|
+
item.description("en") # => "English author and humourist (1952–2001)"
|
|
49
|
+
item.aliases("en") # => ["Douglas Noël Adams", ...]
|
|
50
|
+
item.sitelink("enwiki") # => "Douglas Adams"
|
|
51
|
+
|
|
52
|
+
# Claims are returned as Wikimelon::Statement objects.
|
|
53
|
+
# A property can have multiple claims (sometimes contradictory), each carrying
|
|
54
|
+
# a rank: "preferred", "normal", or "deprecated". The caller decides which to use.
|
|
55
|
+
item.claims("P31").map(&:value) # => ["Q5"] (instance of: human)
|
|
56
|
+
item.claims("P569").first.value # => "+1952-03-11T00:00:00Z" (date of birth)
|
|
57
|
+
item.claims("P106").map(&:value) # => ["Q36180", "Q28389", ...] (occupations)
|
|
58
|
+
|
|
59
|
+
# When a property has multi-rank statements, filter explicitly:
|
|
60
|
+
france = Wikimelon::Item.find("Q142")
|
|
61
|
+
france.claims("P35").find { |s| s.rank == "preferred" }.value # current head of state
|
|
62
|
+
|
|
63
|
+
# Each statement carries the references that cite it.
|
|
64
|
+
# A reference is a bag of snaks (property → value) pointing at a source.
|
|
65
|
+
stmt = item.claims("P31").first
|
|
66
|
+
ref = stmt.references.first
|
|
67
|
+
ref.properties # => ["P248", "P268", "P407", "P813"]
|
|
68
|
+
ref.snaks("P248").first.value # => "Q19938912" (BnF authorities)
|
|
69
|
+
|
|
70
|
+
# Escape hatch — the raw JSON Hash is still available
|
|
71
|
+
item.raw # => { "entities" => { "Q42" => {...} } }
|
|
44
72
|
```
|
|
45
73
|
|
|
46
|
-
Fetch
|
|
74
|
+
Fetch a specific revision:
|
|
47
75
|
```ruby
|
|
48
|
-
|
|
49
|
-
Wikimelon.entity(entity_id) # => MultiJson object
|
|
76
|
+
Wikimelon::Item.find("Q13", revision_id: 109)
|
|
50
77
|
```
|
|
51
78
|
|
|
52
|
-
|
|
79
|
+
`item.exists?` returns `false` for missing IDs and for IDs that have been merged or redirected (in a redirect, the API returns the *target* entity, whose `id` differs from the requested string):
|
|
53
80
|
```ruby
|
|
54
|
-
|
|
55
|
-
Wikimelon.
|
|
81
|
+
Wikimelon::Item.find("Q52793654").exists? # => false (redirects to Q336)
|
|
82
|
+
Wikimelon::Item.find("Q1000000000").exists? # => false (well-formed but unassigned)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
### Properties
|
|
87
|
+
Fetch a Wikidata property (`P…`). Returns a `Wikimelon::Property` with all the `Item` helpers plus `#datatype`:
|
|
88
|
+
```ruby
|
|
89
|
+
prop = Wikimelon::Property.find("P12817")
|
|
90
|
+
|
|
91
|
+
prop.label # => "Cockroach Species File taxon ID (new)"
|
|
92
|
+
prop.datatype # => "external-id"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
### Low-level access
|
|
97
|
+
For most cases prefer `Item.find` / `Property.find` above. The raw endpoint is exposed for advanced use (custom JSON processing, pre-flighting an ID before constructing a wrapper):
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
Wikimelon.entity("Q13") # => raw Hash
|
|
101
|
+
Wikimelon.entity("Q13", revision_id: 109) # => raw Hash at revision
|
|
102
|
+
|
|
103
|
+
Wikimelon.exists?("Q42") # => true
|
|
104
|
+
Wikimelon.exists?("Q52793654") # => false (redirects to Q336)
|
|
105
|
+
Wikimelon.exists?("Q1000000000") # => false (well-formed but unassigned)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
### Configuration
|
|
110
|
+
Set a default language for `label`, `description`, and `aliases` (defaults to `"en"`):
|
|
111
|
+
```ruby
|
|
112
|
+
Wikimelon.default_language = "es"
|
|
113
|
+
|
|
114
|
+
spain = Wikimelon::Item.find("Q29")
|
|
115
|
+
spain.label # => "España" (uses the configured default)
|
|
116
|
+
spain.label("en") # => "Spain" (explicit override still works)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Throttle outgoing requests by setting a minimum interval between them in seconds (defaults to `0`, no throttling). Useful when running batch jobs against Wikidata to stay polite:
|
|
120
|
+
```ruby
|
|
121
|
+
Wikimelon.request_interval = 0.5 # at most 2 requests/second
|
|
122
|
+
|
|
123
|
+
# Subsequent calls will sleep as needed to enforce the gap
|
|
124
|
+
ids.each { |id| Wikimelon::Item.find(id) }
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Retry on transient errors (`429 Too Many Requests`, `503 Service Unavailable`) with exponential backoff. **Disabled by default** because the sleeps can stall interactive UI requests for many seconds — opt in for batch/CLI work where latency doesn't matter:
|
|
128
|
+
```ruby
|
|
129
|
+
Wikimelon.retry_max = 5 # number of retries (0 = disabled)
|
|
130
|
+
Wikimelon.retry_interval = 0.5 # base wait in seconds; doubles each retry
|
|
131
|
+
```
|
|
132
|
+
When the server provides a `Retry-After` header (Wikidata's SPARQL endpoint reliably does on 429), that value is honored instead of the exponential calculation.
|
|
133
|
+
|
|
134
|
+
Point Wikimelon at a self-hosted Wikibase instance or query service:
|
|
135
|
+
```ruby
|
|
136
|
+
Wikimelon.api_url = "https://wikibase.example.org" # Special:EntityData host
|
|
137
|
+
Wikimelon.sparql_url = "https://wdqs.example.org/sparql" # SPARQL endpoint
|
|
56
138
|
```
|
|
57
139
|
|
|
58
140
|
---
|
data/lib/wikimelon/faraday.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "faraday"
|
|
4
|
-
require "
|
|
4
|
+
require "json"
|
|
5
5
|
|
|
6
6
|
# @private
|
|
7
7
|
module Faraday
|
|
@@ -20,7 +20,7 @@ module Faraday
|
|
|
20
20
|
when 502
|
|
21
21
|
raise Wikimelon::BadGateway, error_message_500(response, "The server returned an invalid or incomplete response.")
|
|
22
22
|
when 503
|
|
23
|
-
raise Wikimelon::ServiceUnavailable, error_message_500(response, "
|
|
23
|
+
raise Wikimelon::ServiceUnavailable, error_message_500(response, "Wikidata is rate limiting your requests.")
|
|
24
24
|
when 504
|
|
25
25
|
raise Wikimelon::GatewayTimeout, error_message_500(response, "504 Gateway Time-out")
|
|
26
26
|
end
|
|
@@ -41,7 +41,7 @@ module Faraday
|
|
|
41
41
|
def error_body(body)
|
|
42
42
|
if !body.nil? && !body.empty? && body.is_a?(String)
|
|
43
43
|
if json?(body)
|
|
44
|
-
body = ::
|
|
44
|
+
body = ::JSON.parse(body)
|
|
45
45
|
if body["message"].nil?
|
|
46
46
|
body = nil
|
|
47
47
|
else
|
|
@@ -62,9 +62,9 @@ module Faraday
|
|
|
62
62
|
end
|
|
63
63
|
|
|
64
64
|
def json?(string)
|
|
65
|
-
|
|
65
|
+
JSON.parse(string)
|
|
66
66
|
true
|
|
67
|
-
rescue
|
|
67
|
+
rescue JSON::ParserError
|
|
68
68
|
false
|
|
69
69
|
end
|
|
70
70
|
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wikimelon
|
|
4
|
+
# A citation attached to a Statement. Holds a bag of snaks keyed by property,
|
|
5
|
+
# commonly P248 ("stated in"), P854 ("reference URL"), P813 ("retrieved"),
|
|
6
|
+
# P1476 ("title"), P143 ("imported from").
|
|
7
|
+
class Reference
|
|
8
|
+
attr_reader :raw
|
|
9
|
+
|
|
10
|
+
def initialize(raw)
|
|
11
|
+
@raw = raw
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def snaks(property_id)
|
|
15
|
+
(@raw.dig('snaks', property_id) || []).map { |s| Statement.new('mainsnak' => s) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def properties
|
|
19
|
+
@raw['snaks-order'] || (@raw['snaks'] || {}).keys
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/wikimelon/request.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
require_relative "faraday" # !! Potential ruby 3.0 difference in module loading? relative differs from Serrano
|
|
2
2
|
require "faraday/follow_redirects"
|
|
3
|
+
require "faraday/retry"
|
|
4
|
+
require "json"
|
|
3
5
|
require_relative "utils"
|
|
4
6
|
|
|
5
7
|
module Wikimelon
|
|
@@ -15,6 +17,7 @@ module Wikimelon
|
|
|
15
17
|
@url = args[:url]
|
|
16
18
|
@verbose = args[:verbose]
|
|
17
19
|
@query = args[:query]
|
|
20
|
+
@params = args[:params]
|
|
18
21
|
@limit = args[:limit]
|
|
19
22
|
@offset = args[:offset]
|
|
20
23
|
@options = args[:options] # TODO: not added at wikimelon.rb
|
|
@@ -22,13 +25,21 @@ module Wikimelon
|
|
|
22
25
|
|
|
23
26
|
def perform
|
|
24
27
|
|
|
25
|
-
|
|
26
|
-
opts = args.delete_if { |_k, v| v.nil? }
|
|
28
|
+
opts = @params || {'query': @query, 'format': 'json'}.delete_if { |_k, v| v.nil? }
|
|
27
29
|
|
|
28
30
|
Faraday::Utils.default_space_encoding = "+"
|
|
29
31
|
|
|
32
|
+
retry_max = Wikimelon.retry_max.to_i
|
|
33
|
+
|
|
30
34
|
conn = Faraday.new(url: @url) do |f|
|
|
31
35
|
f.response :logger if verbose
|
|
36
|
+
if retry_max > 0
|
|
37
|
+
f.request :retry,
|
|
38
|
+
max: retry_max,
|
|
39
|
+
interval: Wikimelon.retry_interval.to_f,
|
|
40
|
+
backoff_factor: 2,
|
|
41
|
+
retry_statuses: [429, 503]
|
|
42
|
+
end
|
|
32
43
|
f.use Faraday::WikimelonErrors::Middleware
|
|
33
44
|
f.adapter Faraday.default_adapter
|
|
34
45
|
end
|
|
@@ -37,9 +48,10 @@ module Wikimelon
|
|
|
37
48
|
conn.headers[:user_agent] = make_user_agent
|
|
38
49
|
conn.headers["X-USER-AGENT"] = make_user_agent
|
|
39
50
|
|
|
51
|
+
Wikimelon::Throttle.wait!
|
|
40
52
|
res = conn.get(endpoint, opts)
|
|
41
53
|
|
|
42
|
-
|
|
54
|
+
JSON.parse(res.body)
|
|
43
55
|
end
|
|
44
56
|
end
|
|
45
57
|
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wikimelon
|
|
4
|
+
class Resource
|
|
5
|
+
attr_reader :id, :raw
|
|
6
|
+
|
|
7
|
+
def self.find(id, revision_id: nil)
|
|
8
|
+
new(id, Wikimelon.entity(id, revision_id: revision_id))
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Fetch multiple resources by ID in batches of up to 50 (the wbgetentities
|
|
12
|
+
# hard limit). Returns wrapped resources in the same order as the input.
|
|
13
|
+
def self.find_many(ids)
|
|
14
|
+
ids.each_slice(50).flat_map do |batch|
|
|
15
|
+
data = Wikimelon.entities(batch)
|
|
16
|
+
batch.map { |id| new(id, data) }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Fuzzy-search by label or alias. Returns Array<SearchResult>.
|
|
21
|
+
def self.search(query, language: nil, limit: 10)
|
|
22
|
+
type = self == Property ? 'property' : 'item'
|
|
23
|
+
res = Wikimelon.search(query, type: type, language: language, limit: limit)
|
|
24
|
+
(res['search'] || []).map { |hit| SearchResult.new(hit) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def initialize(id, data)
|
|
28
|
+
@id = id
|
|
29
|
+
@raw = data
|
|
30
|
+
@entity = data.dig('entities', id)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def exists?
|
|
34
|
+
!@entity.nil? && @entity['id'] == @id
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def label(lang = nil)
|
|
38
|
+
@entity&.dig('labels', lang || Wikimelon.default_language, 'value')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def description(lang = nil)
|
|
42
|
+
@entity&.dig('descriptions', lang || Wikimelon.default_language, 'value')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def aliases(lang = nil)
|
|
46
|
+
(@entity&.dig('aliases', lang || Wikimelon.default_language) || []).map { |a| a['value'] }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def claims(property_id)
|
|
50
|
+
(@entity&.dig('claims', property_id) || []).map { |c| Statement.new(c) }
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wikimelon
|
|
4
|
+
# A single hit from wbsearchentities. Lighter than a full Resource —
|
|
5
|
+
# only the fields the search endpoint returns. Call Item.find(result.id)
|
|
6
|
+
# to hydrate the full entity.
|
|
7
|
+
class SearchResult
|
|
8
|
+
attr_reader :raw
|
|
9
|
+
|
|
10
|
+
def initialize(raw)
|
|
11
|
+
@raw = raw
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def id = @raw['id']
|
|
15
|
+
def label = @raw['label']
|
|
16
|
+
def description = @raw['description']
|
|
17
|
+
def aliases = @raw['aliases'] || []
|
|
18
|
+
def concept_uri = @raw['concepturi']
|
|
19
|
+
def url = @raw['url']
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wikimelon
|
|
4
|
+
class Statement
|
|
5
|
+
attr_reader :raw
|
|
6
|
+
|
|
7
|
+
def initialize(raw)
|
|
8
|
+
@raw = raw
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def rank
|
|
12
|
+
@raw['rank']
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def property
|
|
16
|
+
@raw.dig('mainsnak', 'property')
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def value
|
|
20
|
+
snak_value(@raw['mainsnak'])
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def qualifiers(property_id)
|
|
24
|
+
(@raw.dig('qualifiers', property_id) || []).map { |q| Statement.new('mainsnak' => q) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def references
|
|
28
|
+
(@raw['references'] || []).map { |r| Reference.new(r) }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def snak_value(snak)
|
|
34
|
+
return nil if snak.nil? || snak['snaktype'] != 'value'
|
|
35
|
+
dv = snak['datavalue'] or return nil
|
|
36
|
+
case dv['type']
|
|
37
|
+
when 'wikibase-entityid' then dv.dig('value', 'id')
|
|
38
|
+
when 'time' then dv.dig('value', 'time')
|
|
39
|
+
when 'monolingualtext' then dv.dig('value', 'text')
|
|
40
|
+
when 'quantity' then dv.dig('value', 'amount')
|
|
41
|
+
when 'string', 'external-id' then dv['value']
|
|
42
|
+
else dv['value']
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wikimelon
|
|
4
|
+
# Enforces a minimum interval between outgoing requests when
|
|
5
|
+
# Wikimelon.request_interval is set (in seconds). Thread-safe.
|
|
6
|
+
module Throttle
|
|
7
|
+
@mutex = Mutex.new
|
|
8
|
+
@last_request_at = nil
|
|
9
|
+
|
|
10
|
+
def self.wait!
|
|
11
|
+
interval = Wikimelon.request_interval.to_f
|
|
12
|
+
return if interval <= 0
|
|
13
|
+
|
|
14
|
+
@mutex.synchronize do
|
|
15
|
+
if @last_request_at
|
|
16
|
+
elapsed = now - @last_request_at
|
|
17
|
+
sleep(interval - elapsed) if elapsed < interval
|
|
18
|
+
end
|
|
19
|
+
@last_request_at = now
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.reset!
|
|
24
|
+
@mutex.synchronize { @last_request_at = nil }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def self.now
|
|
28
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
data/lib/wikimelon/version.rb
CHANGED
data/lib/wikimelon.rb
CHANGED
|
@@ -10,6 +10,12 @@ module Wikimelon
|
|
|
10
10
|
extend Configuration
|
|
11
11
|
|
|
12
12
|
define_setting :mailto, ENV["WIKIMELON_API_EMAIL"]
|
|
13
|
+
define_setting :default_language, 'en'
|
|
14
|
+
define_setting :request_interval, 0
|
|
15
|
+
define_setting :retry_max, 0
|
|
16
|
+
define_setting :retry_interval, 0.5
|
|
17
|
+
define_setting :api_url, 'https://www.wikidata.org'
|
|
18
|
+
define_setting :sparql_url, 'https://query.wikidata.org/sparql'
|
|
13
19
|
|
|
14
20
|
# Run a Wikidata SPARQL query
|
|
15
21
|
#
|
|
@@ -20,7 +26,7 @@ module Wikimelon
|
|
|
20
26
|
# @return [Array, Boolean] An array of hashes
|
|
21
27
|
def self.query(query, verbose: false)
|
|
22
28
|
Request.new(
|
|
23
|
-
url:
|
|
29
|
+
url: sparql_url,
|
|
24
30
|
query: query,
|
|
25
31
|
verbose: verbose
|
|
26
32
|
).perform
|
|
@@ -36,11 +42,72 @@ module Wikimelon
|
|
|
36
42
|
#
|
|
37
43
|
# @return [Array, Boolean] An array of hashes
|
|
38
44
|
def self.entity(entity_id, revision_id: nil, verbose: false)
|
|
39
|
-
url = "
|
|
45
|
+
url = "#{api_url}/wiki/Special:EntityData/#{entity_id}.json"
|
|
40
46
|
url = "#{url}?revision=#{revision_id}" unless revision_id.nil?
|
|
41
47
|
Request.new(
|
|
42
48
|
url: url,
|
|
43
49
|
verbose: verbose
|
|
44
50
|
).perform
|
|
45
51
|
end
|
|
52
|
+
|
|
53
|
+
# Check whether a Wikidata entity exists at the exact ID requested.
|
|
54
|
+
# Returns false for missing IDs and for IDs that have been merged or
|
|
55
|
+
# redirected (the API returns the target entity, whose id differs from
|
|
56
|
+
# the requested string).
|
|
57
|
+
#
|
|
58
|
+
# @param entity_id [String] a Wikidata entity ID
|
|
59
|
+
# @param verbose [Boolean] Print headers to STDOUT
|
|
60
|
+
#
|
|
61
|
+
# @return [Boolean]
|
|
62
|
+
def self.exists?(entity_id, verbose: false)
|
|
63
|
+
res = entity(entity_id, verbose: verbose)
|
|
64
|
+
res.dig('entities', entity_id, 'id') == entity_id
|
|
65
|
+
rescue Wikimelon::NotFound
|
|
66
|
+
false
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Fetch multiple Wikidata entities in a single request via wbgetentities.
|
|
70
|
+
# Up to 50 IDs per call (the API hard limit). Use Item.find_many or
|
|
71
|
+
# Property.find_many for the wrapped-object form.
|
|
72
|
+
#
|
|
73
|
+
# @param ids [Array<String>]
|
|
74
|
+
# @return [Hash] the raw {"entities" => {...}} response
|
|
75
|
+
def self.entities(ids, verbose: false)
|
|
76
|
+
Request.new(
|
|
77
|
+
url: "#{api_url}/w/api.php",
|
|
78
|
+
params: { action: 'wbgetentities', ids: ids.join('|'), format: 'json' },
|
|
79
|
+
verbose: verbose
|
|
80
|
+
).perform
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Fuzzy-search Wikidata for an item, property, or other entity type
|
|
84
|
+
# via wbsearchentities. Returns the raw response hash; use
|
|
85
|
+
# Item.search / Property.search for the wrapped form.
|
|
86
|
+
#
|
|
87
|
+
# @param query [String] free-text search
|
|
88
|
+
# @param type [String] "item", "property", "lexeme", "form", "sense"
|
|
89
|
+
# @param language [String] language code; defaults to Wikimelon.default_language
|
|
90
|
+
# @param limit [Integer] 1-50
|
|
91
|
+
def self.search(query, type: 'item', language: nil, limit: 10, verbose: false)
|
|
92
|
+
Request.new(
|
|
93
|
+
url: "#{api_url}/w/api.php",
|
|
94
|
+
params: {
|
|
95
|
+
action: 'wbsearchentities',
|
|
96
|
+
search: query,
|
|
97
|
+
language: language || default_language,
|
|
98
|
+
type: type,
|
|
99
|
+
limit: limit,
|
|
100
|
+
format: 'json'
|
|
101
|
+
},
|
|
102
|
+
verbose: verbose
|
|
103
|
+
).perform
|
|
104
|
+
end
|
|
46
105
|
end
|
|
106
|
+
|
|
107
|
+
require_relative "wikimelon/throttle"
|
|
108
|
+
require_relative "wikimelon/reference"
|
|
109
|
+
require_relative "wikimelon/statement"
|
|
110
|
+
require_relative "wikimelon/search_result"
|
|
111
|
+
require_relative "wikimelon/resource"
|
|
112
|
+
require_relative "wikimelon/item"
|
|
113
|
+
require_relative "wikimelon/property"
|
data/wikimelon.gemspec
CHANGED
|
@@ -5,14 +5,14 @@ require_relative "lib/wikimelon/version"
|
|
|
5
5
|
Gem::Specification.new do |s|
|
|
6
6
|
s.name = "wikimelon"
|
|
7
7
|
s.version = Wikimelon::VERSION
|
|
8
|
-
s.authors = ["Geoff Ower, Matt Yoder"]
|
|
8
|
+
s.authors = ["Geoff Ower", "Matt Yoder"]
|
|
9
9
|
s.email = ["gdower@illinois.edu"]
|
|
10
10
|
|
|
11
11
|
s.summary = "Wikidata Client"
|
|
12
12
|
s.description = "Wikimelon is a low-level wrapper around the Wikidata API."
|
|
13
13
|
s.homepage = "https://github.com/SpeciesFileGroup/wikimelon"
|
|
14
14
|
s.license = "MIT"
|
|
15
|
-
s.required_ruby_version = ">= 2.5.0"
|
|
15
|
+
s.required_ruby_version = [">= 2.7.0", "< 5.0"]
|
|
16
16
|
|
|
17
17
|
# s.metadata["allowed_push_host"] = "TODO: Set to 'https://mygemserver.com'"
|
|
18
18
|
|
|
@@ -32,17 +32,17 @@ Gem::Specification.new do |s|
|
|
|
32
32
|
# Uncomment to register a new dependency of your gem
|
|
33
33
|
# s.add_dependency "example-gem", "~> 1.0"
|
|
34
34
|
|
|
35
|
-
s.add_development_dependency "bundler", "~> 2.1", ">= 2.1.4"
|
|
36
35
|
s.add_development_dependency "rake", "~> 13.0", ">= 13.0.1"
|
|
37
36
|
s.add_development_dependency "test-unit", "~> 3.3", ">= 3.3.6"
|
|
38
37
|
s.add_development_dependency "vcr", "~> 6.0"
|
|
39
38
|
s.add_development_dependency "webmock", "~> 3.18"
|
|
40
39
|
s.add_development_dependency "rexml", "~> 3.3", ">= 3.3.6"
|
|
40
|
+
s.add_development_dependency "irb", "~> 1.0"
|
|
41
41
|
# s.add_development_dependency "byebug"
|
|
42
42
|
|
|
43
43
|
s.add_runtime_dependency "faraday", "~> 2.2"
|
|
44
|
-
s.add_runtime_dependency "faraday-follow_redirects", ">= 0.1", "< 0.
|
|
45
|
-
s.add_runtime_dependency "
|
|
44
|
+
s.add_runtime_dependency "faraday-follow_redirects", ">= 0.1", "< 0.6"
|
|
45
|
+
s.add_runtime_dependency "faraday-retry", "~> 2.2"
|
|
46
46
|
|
|
47
47
|
# s.add_runtime_dependency "thor", "~> 1.0", ">= 1.0.1"
|
|
48
48
|
|
metadata
CHANGED
|
@@ -1,35 +1,15 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wikimelon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
- Geoff Ower
|
|
8
|
-
|
|
7
|
+
- Geoff Ower
|
|
8
|
+
- Matt Yoder
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
-
- !ruby/object:Gem::Dependency
|
|
14
|
-
name: bundler
|
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - "~>"
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: '2.1'
|
|
20
|
-
- - ">="
|
|
21
|
-
- !ruby/object:Gem::Version
|
|
22
|
-
version: 2.1.4
|
|
23
|
-
type: :development
|
|
24
|
-
prerelease: false
|
|
25
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
26
|
-
requirements:
|
|
27
|
-
- - "~>"
|
|
28
|
-
- !ruby/object:Gem::Version
|
|
29
|
-
version: '2.1'
|
|
30
|
-
- - ">="
|
|
31
|
-
- !ruby/object:Gem::Version
|
|
32
|
-
version: 2.1.4
|
|
33
13
|
- !ruby/object:Gem::Dependency
|
|
34
14
|
name: rake
|
|
35
15
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -118,6 +98,20 @@ dependencies:
|
|
|
118
98
|
- - ">="
|
|
119
99
|
- !ruby/object:Gem::Version
|
|
120
100
|
version: 3.3.6
|
|
101
|
+
- !ruby/object:Gem::Dependency
|
|
102
|
+
name: irb
|
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
|
104
|
+
requirements:
|
|
105
|
+
- - "~>"
|
|
106
|
+
- !ruby/object:Gem::Version
|
|
107
|
+
version: '1.0'
|
|
108
|
+
type: :development
|
|
109
|
+
prerelease: false
|
|
110
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
111
|
+
requirements:
|
|
112
|
+
- - "~>"
|
|
113
|
+
- !ruby/object:Gem::Version
|
|
114
|
+
version: '1.0'
|
|
121
115
|
- !ruby/object:Gem::Dependency
|
|
122
116
|
name: faraday
|
|
123
117
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -141,7 +135,7 @@ dependencies:
|
|
|
141
135
|
version: '0.1'
|
|
142
136
|
- - "<"
|
|
143
137
|
- !ruby/object:Gem::Version
|
|
144
|
-
version: '0.
|
|
138
|
+
version: '0.6'
|
|
145
139
|
type: :runtime
|
|
146
140
|
prerelease: false
|
|
147
141
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -151,21 +145,21 @@ dependencies:
|
|
|
151
145
|
version: '0.1'
|
|
152
146
|
- - "<"
|
|
153
147
|
- !ruby/object:Gem::Version
|
|
154
|
-
version: '0.
|
|
148
|
+
version: '0.6'
|
|
155
149
|
- !ruby/object:Gem::Dependency
|
|
156
|
-
name:
|
|
150
|
+
name: faraday-retry
|
|
157
151
|
requirement: !ruby/object:Gem::Requirement
|
|
158
152
|
requirements:
|
|
159
153
|
- - "~>"
|
|
160
154
|
- !ruby/object:Gem::Version
|
|
161
|
-
version: '
|
|
155
|
+
version: '2.2'
|
|
162
156
|
type: :runtime
|
|
163
157
|
prerelease: false
|
|
164
158
|
version_requirements: !ruby/object:Gem::Requirement
|
|
165
159
|
requirements:
|
|
166
160
|
- - "~>"
|
|
167
161
|
- !ruby/object:Gem::Version
|
|
168
|
-
version: '
|
|
162
|
+
version: '2.2'
|
|
169
163
|
description: Wikimelon is a low-level wrapper around the Wikidata API.
|
|
170
164
|
email:
|
|
171
165
|
- gdower@illinois.edu
|
|
@@ -187,7 +181,14 @@ files:
|
|
|
187
181
|
- lib/wikimelon/error.rb
|
|
188
182
|
- lib/wikimelon/faraday.rb
|
|
189
183
|
- lib/wikimelon/helpers/configuration.rb
|
|
184
|
+
- lib/wikimelon/item.rb
|
|
185
|
+
- lib/wikimelon/property.rb
|
|
186
|
+
- lib/wikimelon/reference.rb
|
|
190
187
|
- lib/wikimelon/request.rb
|
|
188
|
+
- lib/wikimelon/resource.rb
|
|
189
|
+
- lib/wikimelon/search_result.rb
|
|
190
|
+
- lib/wikimelon/statement.rb
|
|
191
|
+
- lib/wikimelon/throttle.rb
|
|
191
192
|
- lib/wikimelon/utils.rb
|
|
192
193
|
- lib/wikimelon/version.rb
|
|
193
194
|
- wikimelon.gemspec
|
|
@@ -197,8 +198,7 @@ licenses:
|
|
|
197
198
|
metadata:
|
|
198
199
|
homepage_uri: https://github.com/SpeciesFileGroup/wikimelon
|
|
199
200
|
source_code_uri: https://github.com/SpeciesFileGroup/wikimelon
|
|
200
|
-
changelog_uri: https://github.com/SpeciesFileGroup/wikimelon/releases/tag/v0.
|
|
201
|
-
post_install_message:
|
|
201
|
+
changelog_uri: https://github.com/SpeciesFileGroup/wikimelon/releases/tag/v0.1.1
|
|
202
202
|
rdoc_options: []
|
|
203
203
|
require_paths:
|
|
204
204
|
- lib
|
|
@@ -206,15 +206,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
206
206
|
requirements:
|
|
207
207
|
- - ">="
|
|
208
208
|
- !ruby/object:Gem::Version
|
|
209
|
-
version: 2.
|
|
209
|
+
version: 2.7.0
|
|
210
|
+
- - "<"
|
|
211
|
+
- !ruby/object:Gem::Version
|
|
212
|
+
version: '5.0'
|
|
210
213
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
211
214
|
requirements:
|
|
212
215
|
- - ">="
|
|
213
216
|
- !ruby/object:Gem::Version
|
|
214
217
|
version: '0'
|
|
215
218
|
requirements: []
|
|
216
|
-
rubygems_version:
|
|
217
|
-
signing_key:
|
|
219
|
+
rubygems_version: 4.0.3
|
|
218
220
|
specification_version: 4
|
|
219
221
|
summary: Wikidata Client
|
|
220
222
|
test_files: []
|