gared 0.0.22 → 0.0.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gared.rb +1 -0
- data/lib/gared/jpress.rb +0 -31
- data/lib/gared/nli_api.rb +89 -0
- data/test/test_gared.rb +16 -0
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
|
4
|
+
data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
|
7
|
+
data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
|
data/lib/gared.rb
CHANGED
data/lib/gared/jpress.rb
CHANGED
@@ -1,35 +1,4 @@
|
|
1
1
|
module Gared
|
2
2
|
class Jpress
|
3
|
-
require 'watir'
|
4
|
-
|
5
|
-
def initialize
|
6
|
-
@browser = Watir::Browser.new :chrome, options: {args: ['--no-sandbox', '--headless']}
|
7
|
-
end
|
8
|
-
|
9
|
-
def query_persons(q)
|
10
|
-
end
|
11
|
-
|
12
|
-
def query_person(person)
|
13
|
-
end
|
14
|
-
|
15
|
-
def query_publications(q)
|
16
|
-
end
|
17
|
-
|
18
|
-
def query_publication(publication)
|
19
|
-
end
|
20
|
-
|
21
|
-
# return in-memory Publication instances with associated Holdings
|
22
|
-
def query_publications_by_person(person, ctx = nil)
|
23
|
-
@browser.goto 'http://web.nli.org.il/sites/JPress/Hebrew/Pages/default.aspx'
|
24
|
-
@browser.wait
|
25
|
-
|
26
|
-
t = @browser.text_field(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_InputKeywords') # srsly, Micro$oft
|
27
|
-
t.set(person)
|
28
|
-
@browser.a(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_go').click # "quick search" - not necessarily by author!
|
29
|
-
@browser.wait
|
30
|
-
ret = []
|
31
|
-
begin
|
32
|
-
|
33
|
-
end
|
34
3
|
end
|
35
4
|
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
|
3
|
+
module Gared
|
4
|
+
class Nli_Api
|
5
|
+
def initialize(url, api_key)
|
6
|
+
@options = {url: url, api_key: api_key}
|
7
|
+
end
|
8
|
+
|
9
|
+
def query_persons(q)
|
10
|
+
end
|
11
|
+
|
12
|
+
def query_person(person)
|
13
|
+
end
|
14
|
+
|
15
|
+
def query_publications(q)
|
16
|
+
end
|
17
|
+
|
18
|
+
def query_publication(publication)
|
19
|
+
end
|
20
|
+
def fetch_value_by_dc_key(record, key)
|
21
|
+
ret = ''
|
22
|
+
fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
|
23
|
+
unless record.nil?
|
24
|
+
unless record[fullkey].nil?
|
25
|
+
if record[fullkey].class == String
|
26
|
+
ret = record[fullkey ]
|
27
|
+
elsif record[fullkey].class == Array
|
28
|
+
ret = record[fullkey].map{|x| x['@value'] }.join('; ')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
ret
|
33
|
+
end
|
34
|
+
# return in-memory Publication instances with associated Holdings
|
35
|
+
def query_publications_by_person(person, ctx = nil)
|
36
|
+
ret = []
|
37
|
+
begin
|
38
|
+
# first run obtain counts for the query
|
39
|
+
escaped_person = URI.escape(person)
|
40
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
|
41
|
+
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
42
|
+
total = json['countInfos']['total']
|
43
|
+
# then start loading the results
|
44
|
+
result_page = 1
|
45
|
+
recs = []
|
46
|
+
while recs.length < total
|
47
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
|
48
|
+
puts "DBG: retrieving results page #{result_page}"
|
49
|
+
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
50
|
+
recs += json
|
51
|
+
result_page += 1
|
52
|
+
# sleep 1 # respect the server and avoid flood-blocking
|
53
|
+
end
|
54
|
+
recs.each do |r|
|
55
|
+
begin
|
56
|
+
p = Publication.new(ctx)
|
57
|
+
p.title = fetch_value_by_dc_key(r, 'title')
|
58
|
+
p.author_line = fetch_value_by_dc_key(r, 'creator')
|
59
|
+
p.language = fetch_value_by_dc_key(r, 'language')
|
60
|
+
p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
|
61
|
+
p.publisher_line = fetch_value_by_dc_key(r,'publisher')
|
62
|
+
p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
|
63
|
+
p.source_id = fetch_value_by_dc_key(r, '@id')
|
64
|
+
# collect additional URLS from record, for clients to be able to determine whether a scanned object exists
|
65
|
+
additional_urls = []
|
66
|
+
r.keys.each do |key|
|
67
|
+
val = fetch_value_by_dc_key(r, key)
|
68
|
+
additional_urls << val if val =~ /https?:[^\s]\/\//
|
69
|
+
end
|
70
|
+
p.additional_urls = additional_urls if additional_urls.length > 0
|
71
|
+
h = Holding.new
|
72
|
+
h.source_id = p.source_id
|
73
|
+
h.source_name = 'NLI API'
|
74
|
+
h.location = fetch_value_by_dc_key(r, 'recordid')
|
75
|
+
p.add_holding(h)
|
76
|
+
ret << p
|
77
|
+
rescue Exception
|
78
|
+
puts $!
|
79
|
+
end
|
80
|
+
end
|
81
|
+
# TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
|
82
|
+
# the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
|
83
|
+
rescue Exception
|
84
|
+
puts $!
|
85
|
+
end
|
86
|
+
return ret
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/test/test_gared.rb
CHANGED
@@ -1,8 +1,24 @@
|
|
1
|
+
require 'minitest/byebug' if ENV['DEBUG']
|
1
2
|
require 'minitest/autorun'
|
2
3
|
require 'gared'
|
3
4
|
|
4
5
|
class GaredTest < Minitest::Test
|
5
6
|
|
7
|
+
def test_nli_api_query_publicatios_by_person
|
8
|
+
if ENV['NLI_API_KEY'].nil?
|
9
|
+
puts "skipping NLI API test because NLI_API_KEY envvar is not set"
|
10
|
+
return
|
11
|
+
end
|
12
|
+
puts "Testing NLI API"
|
13
|
+
nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
|
14
|
+
refute_nil nli
|
15
|
+
recs = nli.query_publications_by_person('ביאליק')
|
16
|
+
#recs = nli.query_publications_by_person('אילנאה')
|
17
|
+
refute_nil recs
|
18
|
+
refute_empty(recs)
|
19
|
+
refute_empty(recs[0].title)
|
20
|
+
end
|
21
|
+
|
6
22
|
def test_primo_query_publicatios_by_person
|
7
23
|
puts "Testing Primo"
|
8
24
|
primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gared
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Bartov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zoom
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- lib/gared/holding.rb
|
94
94
|
- lib/gared/idea.rb
|
95
95
|
- lib/gared/jpress.rb
|
96
|
+
- lib/gared/nli_api.rb
|
96
97
|
- lib/gared/person.rb
|
97
98
|
- lib/gared/primo.rb
|
98
99
|
- lib/gared/publication.rb
|
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
117
|
- !ruby/object:Gem::Version
|
117
118
|
version: '0'
|
118
119
|
requirements: []
|
119
|
-
|
120
|
-
rubygems_version: 2.7.7
|
120
|
+
rubygems_version: 3.1.4
|
121
121
|
signing_key:
|
122
122
|
specification_version: 4
|
123
123
|
summary: Scrape Hebrew bibliography sources
|