gared 0.0.23 → 0.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gared/googlebooks.rb +6 -2
- data/lib/gared/nli_api.rb +9 -5
- data/lib/gared/primo.rb +7 -3
- data/lib/gared.rb +0 -1
- data/test/test_gared.rb +23 -27
- metadata +6 -21
- data/lib/gared/aleph.rb +0 -80
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8dcff1820d0b68f25346b0899c3417eb0aa758ef282c65bf008aa0c36e5dd2a
|
4
|
+
data.tar.gz: 21277a0b51dbd03c75598e1d5ca37c09875d32e62e668facec2c82f335f3c78c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d37c27081fb9385373d0336ac2d8d3f270e35c520ae180bfcea85e51f5fe13ae157fe6c2d8c97407890f0a4f899c8f416a757941470e3cc0cb8471e65ce22cca
|
7
|
+
data.tar.gz: 86f92c9ca4609bdeb31180bc6e63f46c76f212bc5665b3acd6997510cb91459b5a11356b44be07f707a626d512074f19c466d9cc085caf38f2e8fde0230a2604
|
data/lib/gared/googlebooks.rb
CHANGED
@@ -5,9 +5,13 @@ module Gared
|
|
5
5
|
def initialize(api_key, page_size = '40')
|
6
6
|
@options = {api_key: api_key, maxResults: page_size}
|
7
7
|
end
|
8
|
-
|
8
|
+
def uri_escape(s)
|
9
|
+
p = URI::Parser.new
|
10
|
+
return p.escape(s)
|
11
|
+
end
|
12
|
+
|
9
13
|
def query_publications_by_person(person, ctx = nil)
|
10
|
-
url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{
|
14
|
+
url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{uri_escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
|
11
15
|
resp = JSON.parse(RestClient.get(url))
|
12
16
|
|
13
17
|
total = resp['totalItems']
|
data/lib/gared/nli_api.rb
CHANGED
@@ -5,7 +5,11 @@ module Gared
|
|
5
5
|
def initialize(url, api_key)
|
6
6
|
@options = {url: url, api_key: api_key}
|
7
7
|
end
|
8
|
-
|
8
|
+
def uri_escape(s)
|
9
|
+
p = URI::Parser.new
|
10
|
+
return p.escape(s)
|
11
|
+
end
|
12
|
+
|
9
13
|
def query_persons(q)
|
10
14
|
end
|
11
15
|
|
@@ -36,15 +40,15 @@ module Gared
|
|
36
40
|
ret = []
|
37
41
|
begin
|
38
42
|
# first run obtain counts for the query
|
39
|
-
escaped_person =
|
40
|
-
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=
|
43
|
+
escaped_person = uri_escape(person)
|
44
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&count_mode=true"
|
41
45
|
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
42
46
|
total = json['countInfos']['total']
|
43
47
|
# then start loading the results
|
44
48
|
result_page = 1
|
45
49
|
recs = []
|
46
50
|
while recs.length < total
|
47
|
-
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=
|
51
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&result_page=#{result_page}"
|
48
52
|
puts "DBG: retrieving results page #{result_page}"
|
49
53
|
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
50
54
|
recs += json
|
@@ -79,7 +83,7 @@ module Gared
|
|
79
83
|
end
|
80
84
|
end
|
81
85
|
# TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
|
82
|
-
# the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=
|
86
|
+
# the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=book
|
83
87
|
rescue Exception
|
84
88
|
puts $!
|
85
89
|
end
|
data/lib/gared/primo.rb
CHANGED
@@ -6,7 +6,11 @@ module Gared
|
|
6
6
|
def initialize(url, institution)
|
7
7
|
@options = {url: url, institution: institution}
|
8
8
|
end
|
9
|
-
|
9
|
+
def uri_escape(s)
|
10
|
+
p = URI::Parser.new
|
11
|
+
return p.escape(s)
|
12
|
+
end
|
13
|
+
|
10
14
|
def query_persons(q)
|
11
15
|
end
|
12
16
|
|
@@ -23,14 +27,14 @@ module Gared
|
|
23
27
|
def query_publications_by_person(person, ctx = nil)
|
24
28
|
ret = []
|
25
29
|
begin
|
26
|
-
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{
|
30
|
+
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
|
27
31
|
json = JSON.parse(RestClient.get(url))
|
28
32
|
total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
|
29
33
|
start_at = 1
|
30
34
|
recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
|
31
35
|
while recs.length < total
|
32
36
|
start_at += 50
|
33
|
-
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{
|
37
|
+
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
|
34
38
|
json = JSON.parse(RestClient.get(url))
|
35
39
|
recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
|
36
40
|
sleep 1 # respect the server and avoid flood-blocking
|
data/lib/gared.rb
CHANGED
data/test/test_gared.rb
CHANGED
@@ -4,44 +4,40 @@ require 'gared'
|
|
4
4
|
|
5
5
|
class GaredTest < Minitest::Test
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_nli_api_query_publications_by_person
|
8
8
|
if ENV['NLI_API_KEY'].nil?
|
9
9
|
puts "skipping NLI API test because NLI_API_KEY envvar is not set"
|
10
10
|
return
|
11
11
|
end
|
12
12
|
puts "Testing NLI API"
|
13
|
+
byebug
|
13
14
|
nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
|
14
15
|
refute_nil nli
|
15
|
-
recs = nli.query_publications_by_person('ביאליק')
|
16
|
-
|
16
|
+
#recs = nli.query_publications_by_person('ביאליק')
|
17
|
+
recs = nli.query_publications_by_person('אילנאה')
|
17
18
|
refute_nil recs
|
18
19
|
refute_empty(recs)
|
19
20
|
refute_empty(recs[0].title)
|
20
21
|
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
refute_empty(recs
|
30
|
-
|
23
|
+
# temporarily disabled until we find another Primo server to test against
|
24
|
+
# def test_primo_query_publications_by_person
|
25
|
+
# puts "Testing Primo"
|
26
|
+
# primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
|
27
|
+
# refute_nil primo
|
28
|
+
# recs = primo.query_publications_by_person('אילנאה')
|
29
|
+
# refute_nil recs
|
30
|
+
# refute_empty(recs)
|
31
|
+
# refute_empty(recs[0].title)
|
32
|
+
# end
|
31
33
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
refute_nil recs
|
38
|
-
refute_empty(recs)
|
39
|
-
refute_empty(recs[0].title)
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_googlebooks_query_publicatios_by_person
|
34
|
+
def test_googlebooks_query_publications_by_person
|
35
|
+
if ENV['GOOGLE_API_KEY'].nil?
|
36
|
+
puts "skipping Google Books API test because GOOGLE_API_KEY envvar is not set"
|
37
|
+
return
|
38
|
+
end
|
43
39
|
puts "Testing Google Books"
|
44
|
-
gb = Gared::Googlebooks.new('
|
40
|
+
gb = Gared::Googlebooks.new(ENV['GOOGLE_API_KEY'])
|
45
41
|
refute_nil gb
|
46
42
|
recs = gb.query_publications_by_person('מנדלי')
|
47
43
|
refute_nil recs
|
@@ -49,7 +45,7 @@ class GaredTest < Minitest::Test
|
|
49
45
|
refute_empty(recs[0].title)
|
50
46
|
end
|
51
47
|
|
52
|
-
def
|
48
|
+
def test_hebrewbooks_query_publications_by_person
|
53
49
|
skip("Skipping testing Hebrewbooks because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
|
54
50
|
puts "Testing Hebrewbooks"
|
55
51
|
hb = Gared::Hebrewbooks.new
|
@@ -60,7 +56,7 @@ class GaredTest < Minitest::Test
|
|
60
56
|
refute_empty(recs[0].title)
|
61
57
|
end
|
62
58
|
|
63
|
-
def
|
59
|
+
def test_idea_query_publications_by_person
|
64
60
|
skip("Skipping testing IDEA because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
|
65
61
|
puts "Testing IDEA"
|
66
62
|
idea = Gared::Idea.new('http://infocenters.co.il/RAANANA/')
|
@@ -74,4 +70,4 @@ class GaredTest < Minitest::Test
|
|
74
70
|
assert_empty(recs)
|
75
71
|
end
|
76
72
|
|
77
|
-
end
|
73
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gared
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Bartov
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: zoom
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.5'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.5'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: watir
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,7 +73,6 @@ extensions: []
|
|
87
73
|
extra_rdoc_files: []
|
88
74
|
files:
|
89
75
|
- lib/gared.rb
|
90
|
-
- lib/gared/aleph.rb
|
91
76
|
- lib/gared/googlebooks.rb
|
92
77
|
- lib/gared/hebrewbooks.rb
|
93
78
|
- lib/gared/holding.rb
|
@@ -102,7 +87,7 @@ homepage: https://gitlab.com/abartov/gared
|
|
102
87
|
licenses:
|
103
88
|
- MIT
|
104
89
|
metadata: {}
|
105
|
-
post_install_message:
|
90
|
+
post_install_message:
|
106
91
|
rdoc_options: []
|
107
92
|
require_paths:
|
108
93
|
- lib
|
@@ -117,8 +102,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
102
|
- !ruby/object:Gem::Version
|
118
103
|
version: '0'
|
119
104
|
requirements: []
|
120
|
-
rubygems_version: 3.
|
121
|
-
signing_key:
|
105
|
+
rubygems_version: 3.2.32
|
106
|
+
signing_key:
|
122
107
|
specification_version: 4
|
123
108
|
summary: Scrape Hebrew bibliography sources
|
124
109
|
test_files:
|
data/lib/gared/aleph.rb
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
# Z39.50 values according to https://www.loc.gov/z3950/agency/bib1.html
|
2
|
-
# and NLI info according to http://web.nli.org.il/sites/NLI/Hebrew/infochannels/librarians/Pages/Z39.50.aspx
|
3
|
-
# Name of Database: NNL01
|
4
|
-
# Host name: aleph.nli.org.il
|
5
|
-
# IP address: 192.114.7.200
|
6
|
-
# Port: 9991
|
7
|
-
# Character-set: UTF-8
|
8
|
-
# We support the following record syntaxes:
|
9
|
-
# USMARC, OPAC, XML
|
10
|
-
# We support the following word searches:
|
11
|
-
# 1016, 1017, 1,1003, 1004,4,21,30,31,7,12,1007,1031,1007,5028,1033
|
12
|
-
# We support the following phrase searches:
|
13
|
-
# 7,12,1,1003,1004,4,21,15
|
14
|
-
# We support the following sorts:
|
15
|
-
# 1,4,30,31,1003
|
16
|
-
|
17
|
-
module Gared
|
18
|
-
require 'zoom'
|
19
|
-
require 'nokogiri'
|
20
|
-
class Aleph
|
21
|
-
def initialize(host, port, database, syntax = 'USMARC')
|
22
|
-
@options = {host: host, port: port, database: database, syntax: syntax}
|
23
|
-
end
|
24
|
-
def query_persons(q)
|
25
|
-
end
|
26
|
-
|
27
|
-
def query_person(person)
|
28
|
-
end
|
29
|
-
|
30
|
-
def query_publications(q)
|
31
|
-
end
|
32
|
-
|
33
|
-
def query_publication(publication)
|
34
|
-
end
|
35
|
-
|
36
|
-
def query_publications_by_person(person, ctx = nil)
|
37
|
-
ZOOM::Connection.open(@options[:host], @options[:port]) do |conn|
|
38
|
-
conn.database_name = @options[:database] # 'aleph.nli.org.il',9991
|
39
|
-
conn.preferred_record_syntax = @options[:syntax]
|
40
|
-
rset = conn.search("@attr 1=1003 @attr 2=3 @attr 4=1 \"#{person}\"")
|
41
|
-
rr = rset.records
|
42
|
-
return nil if rr.nil? or rr.empty?
|
43
|
-
ret = []
|
44
|
-
rr.each do |r|
|
45
|
-
xml = Nokogiri::Slop(r.xml)
|
46
|
-
xml.remove_namespaces! # keeps biting me :)
|
47
|
-
# these scrapes are based on the National Library of Israel usage. No attempt to make it generic. :)
|
48
|
-
p = Publication.new(ctx)
|
49
|
-
begin
|
50
|
-
p.author_line = xml.xpath('//datafield[@tag=\'100\']/subfield[@code=\'a\']')[0].text
|
51
|
-
# puts "author: #{p.author_line}" # DEBUG
|
52
|
-
rescue
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
begin
|
56
|
-
p.title = xml.xpath('//datafield[@tag=\'245\']/subfield[@code=\'a\']')[0].text
|
57
|
-
# puts "title: #{p.title}" # DEBUG
|
58
|
-
rescue
|
59
|
-
nil
|
60
|
-
end
|
61
|
-
begin
|
62
|
-
p.notes = xml.xpath('//datafield[@tag=\'500\']/subfield[@code=\'a\']').collect{|note| note.text}.join("\n")
|
63
|
-
rescue
|
64
|
-
nil
|
65
|
-
end
|
66
|
-
begin
|
67
|
-
h = Holding.new
|
68
|
-
h.source_id = xml.xpath('//datafield[@tag=\'090\']/subfield[@code=\'a\']')[0].text
|
69
|
-
h.source_name = @options[:database]
|
70
|
-
p.add_holding(h)
|
71
|
-
ret << p
|
72
|
-
rescue
|
73
|
-
nil # ignore records with no holdings; they may be archival files or other non-publications
|
74
|
-
end #
|
75
|
-
end
|
76
|
-
return ret
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|