gared 0.0.22 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gared.rb +1 -0
- data/lib/gared/jpress.rb +0 -31
- data/lib/gared/nli_api.rb +89 -0
- data/test/test_gared.rb +16 -0
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
|
4
|
+
data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
|
7
|
+
data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
|
data/lib/gared.rb
CHANGED
data/lib/gared/jpress.rb
CHANGED
@@ -1,35 +1,4 @@
|
|
1
1
|
module Gared
|
2
2
|
class Jpress
|
3
|
-
require 'watir'
|
4
|
-
|
5
|
-
def initialize
|
6
|
-
@browser = Watir::Browser.new :chrome, options: {args: ['--no-sandbox', '--headless']}
|
7
|
-
end
|
8
|
-
|
9
|
-
def query_persons(q)
|
10
|
-
end
|
11
|
-
|
12
|
-
def query_person(person)
|
13
|
-
end
|
14
|
-
|
15
|
-
def query_publications(q)
|
16
|
-
end
|
17
|
-
|
18
|
-
def query_publication(publication)
|
19
|
-
end
|
20
|
-
|
21
|
-
# return in-memory Publication instances with associated Holdings
|
22
|
-
def query_publications_by_person(person, ctx = nil)
|
23
|
-
@browser.goto 'http://web.nli.org.il/sites/JPress/Hebrew/Pages/default.aspx'
|
24
|
-
@browser.wait
|
25
|
-
|
26
|
-
t = @browser.text_field(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_InputKeywords') # srsly, Micro$oft
|
27
|
-
t.set(person)
|
28
|
-
@browser.a(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_go').click # "quick search" - not necessarily by author!
|
29
|
-
@browser.wait
|
30
|
-
ret = []
|
31
|
-
begin
|
32
|
-
|
33
|
-
end
|
34
3
|
end
|
35
4
|
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
|
3
|
+
module Gared
|
4
|
+
class Nli_Api
|
5
|
+
def initialize(url, api_key)
|
6
|
+
@options = {url: url, api_key: api_key}
|
7
|
+
end
|
8
|
+
|
9
|
+
def query_persons(q)
|
10
|
+
end
|
11
|
+
|
12
|
+
def query_person(person)
|
13
|
+
end
|
14
|
+
|
15
|
+
def query_publications(q)
|
16
|
+
end
|
17
|
+
|
18
|
+
def query_publication(publication)
|
19
|
+
end
|
20
|
+
def fetch_value_by_dc_key(record, key)
|
21
|
+
ret = ''
|
22
|
+
fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
|
23
|
+
unless record.nil?
|
24
|
+
unless record[fullkey].nil?
|
25
|
+
if record[fullkey].class == String
|
26
|
+
ret = record[fullkey ]
|
27
|
+
elsif record[fullkey].class == Array
|
28
|
+
ret = record[fullkey].map{|x| x['@value'] }.join('; ')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
ret
|
33
|
+
end
|
34
|
+
# return in-memory Publication instances with associated Holdings
|
35
|
+
def query_publications_by_person(person, ctx = nil)
|
36
|
+
ret = []
|
37
|
+
begin
|
38
|
+
# first run obtain counts for the query
|
39
|
+
escaped_person = URI.escape(person)
|
40
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
|
41
|
+
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
42
|
+
total = json['countInfos']['total']
|
43
|
+
# then start loading the results
|
44
|
+
result_page = 1
|
45
|
+
recs = []
|
46
|
+
while recs.length < total
|
47
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
|
48
|
+
puts "DBG: retrieving results page #{result_page}"
|
49
|
+
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
50
|
+
recs += json
|
51
|
+
result_page += 1
|
52
|
+
# sleep 1 # respect the server and avoid flood-blocking
|
53
|
+
end
|
54
|
+
recs.each do |r|
|
55
|
+
begin
|
56
|
+
p = Publication.new(ctx)
|
57
|
+
p.title = fetch_value_by_dc_key(r, 'title')
|
58
|
+
p.author_line = fetch_value_by_dc_key(r, 'creator')
|
59
|
+
p.language = fetch_value_by_dc_key(r, 'language')
|
60
|
+
p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
|
61
|
+
p.publisher_line = fetch_value_by_dc_key(r,'publisher')
|
62
|
+
p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
|
63
|
+
p.source_id = fetch_value_by_dc_key(r, '@id')
|
64
|
+
# collect additional URLS from record, for clients to be able to determine whether a scanned object exists
|
65
|
+
additional_urls = []
|
66
|
+
r.keys.each do |key|
|
67
|
+
val = fetch_value_by_dc_key(r, key)
|
68
|
+
additional_urls << val if val =~ /https?:[^\s]\/\//
|
69
|
+
end
|
70
|
+
p.additional_urls = additional_urls if additional_urls.length > 0
|
71
|
+
h = Holding.new
|
72
|
+
h.source_id = p.source_id
|
73
|
+
h.source_name = 'NLI API'
|
74
|
+
h.location = fetch_value_by_dc_key(r, 'recordid')
|
75
|
+
p.add_holding(h)
|
76
|
+
ret << p
|
77
|
+
rescue Exception
|
78
|
+
puts $!
|
79
|
+
end
|
80
|
+
end
|
81
|
+
# TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
|
82
|
+
# the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
|
83
|
+
rescue Exception
|
84
|
+
puts $!
|
85
|
+
end
|
86
|
+
return ret
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/test/test_gared.rb
CHANGED
@@ -1,8 +1,24 @@
|
|
1
|
+
require 'minitest/byebug' if ENV['DEBUG']
|
1
2
|
require 'minitest/autorun'
|
2
3
|
require 'gared'
|
3
4
|
|
4
5
|
class GaredTest < Minitest::Test
|
5
6
|
|
7
|
+
def test_nli_api_query_publicatios_by_person
|
8
|
+
if ENV['NLI_API_KEY'].nil?
|
9
|
+
puts "skipping NLI API test because NLI_API_KEY envvar is not set"
|
10
|
+
return
|
11
|
+
end
|
12
|
+
puts "Testing NLI API"
|
13
|
+
nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
|
14
|
+
refute_nil nli
|
15
|
+
recs = nli.query_publications_by_person('ביאליק')
|
16
|
+
#recs = nli.query_publications_by_person('אילנאה')
|
17
|
+
refute_nil recs
|
18
|
+
refute_empty(recs)
|
19
|
+
refute_empty(recs[0].title)
|
20
|
+
end
|
21
|
+
|
6
22
|
def test_primo_query_publicatios_by_person
|
7
23
|
puts "Testing Primo"
|
8
24
|
primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gared
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Bartov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zoom
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- lib/gared/holding.rb
|
94
94
|
- lib/gared/idea.rb
|
95
95
|
- lib/gared/jpress.rb
|
96
|
+
- lib/gared/nli_api.rb
|
96
97
|
- lib/gared/person.rb
|
97
98
|
- lib/gared/primo.rb
|
98
99
|
- lib/gared/publication.rb
|
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
117
|
- !ruby/object:Gem::Version
|
117
118
|
version: '0'
|
118
119
|
requirements: []
|
119
|
-
|
120
|
-
rubygems_version: 2.7.7
|
120
|
+
rubygems_version: 3.1.4
|
121
121
|
signing_key:
|
122
122
|
specification_version: 4
|
123
123
|
summary: Scrape Hebrew bibliography sources
|