gared 0.0.23 → 0.0.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gared/googlebooks.rb +6 -2
- data/lib/gared/nli_api.rb +9 -5
- data/lib/gared/primo.rb +7 -3
- data/lib/gared.rb +0 -1
- data/test/test_gared.rb +23 -27
- metadata +6 -21
- data/lib/gared/aleph.rb +0 -80
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8dcff1820d0b68f25346b0899c3417eb0aa758ef282c65bf008aa0c36e5dd2a
|
4
|
+
data.tar.gz: 21277a0b51dbd03c75598e1d5ca37c09875d32e62e668facec2c82f335f3c78c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d37c27081fb9385373d0336ac2d8d3f270e35c520ae180bfcea85e51f5fe13ae157fe6c2d8c97407890f0a4f899c8f416a757941470e3cc0cb8471e65ce22cca
|
7
|
+
data.tar.gz: 86f92c9ca4609bdeb31180bc6e63f46c76f212bc5665b3acd6997510cb91459b5a11356b44be07f707a626d512074f19c466d9cc085caf38f2e8fde0230a2604
|
data/lib/gared/googlebooks.rb
CHANGED
@@ -5,9 +5,13 @@ module Gared
|
|
5
5
|
def initialize(api_key, page_size = '40')
|
6
6
|
@options = {api_key: api_key, maxResults: page_size}
|
7
7
|
end
|
8
|
-
|
8
|
+
def uri_escape(s)
|
9
|
+
p = URI::Parser.new
|
10
|
+
return p.escape(s)
|
11
|
+
end
|
12
|
+
|
9
13
|
def query_publications_by_person(person, ctx = nil)
|
10
|
-
url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{
|
14
|
+
url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{uri_escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
|
11
15
|
resp = JSON.parse(RestClient.get(url))
|
12
16
|
|
13
17
|
total = resp['totalItems']
|
data/lib/gared/nli_api.rb
CHANGED
@@ -5,7 +5,11 @@ module Gared
|
|
5
5
|
def initialize(url, api_key)
|
6
6
|
@options = {url: url, api_key: api_key}
|
7
7
|
end
|
8
|
-
|
8
|
+
def uri_escape(s)
|
9
|
+
p = URI::Parser.new
|
10
|
+
return p.escape(s)
|
11
|
+
end
|
12
|
+
|
9
13
|
def query_persons(q)
|
10
14
|
end
|
11
15
|
|
@@ -36,15 +40,15 @@ module Gared
|
|
36
40
|
ret = []
|
37
41
|
begin
|
38
42
|
# first run obtain counts for the query
|
39
|
-
escaped_person =
|
40
|
-
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=
|
43
|
+
escaped_person = uri_escape(person)
|
44
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&count_mode=true"
|
41
45
|
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
42
46
|
total = json['countInfos']['total']
|
43
47
|
# then start loading the results
|
44
48
|
result_page = 1
|
45
49
|
recs = []
|
46
50
|
while recs.length < total
|
47
|
-
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=
|
51
|
+
url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&result_page=#{result_page}"
|
48
52
|
puts "DBG: retrieving results page #{result_page}"
|
49
53
|
json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
|
50
54
|
recs += json
|
@@ -79,7 +83,7 @@ module Gared
|
|
79
83
|
end
|
80
84
|
end
|
81
85
|
# TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
|
82
|
-
# the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=
|
86
|
+
# the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=book
|
83
87
|
rescue Exception
|
84
88
|
puts $!
|
85
89
|
end
|
data/lib/gared/primo.rb
CHANGED
@@ -6,7 +6,11 @@ module Gared
|
|
6
6
|
def initialize(url, institution)
|
7
7
|
@options = {url: url, institution: institution}
|
8
8
|
end
|
9
|
-
|
9
|
+
def uri_escape(s)
|
10
|
+
p = URI::Parser.new
|
11
|
+
return p.escape(s)
|
12
|
+
end
|
13
|
+
|
10
14
|
def query_persons(q)
|
11
15
|
end
|
12
16
|
|
@@ -23,14 +27,14 @@ module Gared
|
|
23
27
|
def query_publications_by_person(person, ctx = nil)
|
24
28
|
ret = []
|
25
29
|
begin
|
26
|
-
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{
|
30
|
+
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
|
27
31
|
json = JSON.parse(RestClient.get(url))
|
28
32
|
total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
|
29
33
|
start_at = 1
|
30
34
|
recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
|
31
35
|
while recs.length < total
|
32
36
|
start_at += 50
|
33
|
-
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{
|
37
|
+
url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
|
34
38
|
json = JSON.parse(RestClient.get(url))
|
35
39
|
recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
|
36
40
|
sleep 1 # respect the server and avoid flood-blocking
|
data/lib/gared.rb
CHANGED
data/test/test_gared.rb
CHANGED
@@ -4,44 +4,40 @@ require 'gared'
|
|
4
4
|
|
5
5
|
class GaredTest < Minitest::Test
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_nli_api_query_publications_by_person
|
8
8
|
if ENV['NLI_API_KEY'].nil?
|
9
9
|
puts "skipping NLI API test because NLI_API_KEY envvar is not set"
|
10
10
|
return
|
11
11
|
end
|
12
12
|
puts "Testing NLI API"
|
13
|
+
byebug
|
13
14
|
nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
|
14
15
|
refute_nil nli
|
15
|
-
recs = nli.query_publications_by_person('ביאליק')
|
16
|
-
|
16
|
+
#recs = nli.query_publications_by_person('ביאליק')
|
17
|
+
recs = nli.query_publications_by_person('אילנאה')
|
17
18
|
refute_nil recs
|
18
19
|
refute_empty(recs)
|
19
20
|
refute_empty(recs[0].title)
|
20
21
|
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
refute_empty(recs
|
30
|
-
|
23
|
+
# temporarily disabled until we find another Primo server to test against
|
24
|
+
# def test_primo_query_publications_by_person
|
25
|
+
# puts "Testing Primo"
|
26
|
+
# primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
|
27
|
+
# refute_nil primo
|
28
|
+
# recs = primo.query_publications_by_person('אילנאה')
|
29
|
+
# refute_nil recs
|
30
|
+
# refute_empty(recs)
|
31
|
+
# refute_empty(recs[0].title)
|
32
|
+
# end
|
31
33
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
refute_nil recs
|
38
|
-
refute_empty(recs)
|
39
|
-
refute_empty(recs[0].title)
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_googlebooks_query_publicatios_by_person
|
34
|
+
def test_googlebooks_query_publications_by_person
|
35
|
+
if ENV['GOOGLE_API_KEY'].nil?
|
36
|
+
puts "skipping Google Books API test because GOOGLE_API_KEY envvar is not set"
|
37
|
+
return
|
38
|
+
end
|
43
39
|
puts "Testing Google Books"
|
44
|
-
gb = Gared::Googlebooks.new('
|
40
|
+
gb = Gared::Googlebooks.new(ENV['GOOGLE_API_KEY'])
|
45
41
|
refute_nil gb
|
46
42
|
recs = gb.query_publications_by_person('מנדלי')
|
47
43
|
refute_nil recs
|
@@ -49,7 +45,7 @@ class GaredTest < Minitest::Test
|
|
49
45
|
refute_empty(recs[0].title)
|
50
46
|
end
|
51
47
|
|
52
|
-
def
|
48
|
+
def test_hebrewbooks_query_publications_by_person
|
53
49
|
skip("Skipping testing Hebrewbooks because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
|
54
50
|
puts "Testing Hebrewbooks"
|
55
51
|
hb = Gared::Hebrewbooks.new
|
@@ -60,7 +56,7 @@ class GaredTest < Minitest::Test
|
|
60
56
|
refute_empty(recs[0].title)
|
61
57
|
end
|
62
58
|
|
63
|
-
def
|
59
|
+
def test_idea_query_publications_by_person
|
64
60
|
skip("Skipping testing IDEA because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
|
65
61
|
puts "Testing IDEA"
|
66
62
|
idea = Gared::Idea.new('http://infocenters.co.il/RAANANA/')
|
@@ -74,4 +70,4 @@ class GaredTest < Minitest::Test
|
|
74
70
|
assert_empty(recs)
|
75
71
|
end
|
76
72
|
|
77
|
-
end
|
73
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gared
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Bartov
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: zoom
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.5'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.5'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: watir
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,7 +73,6 @@ extensions: []
|
|
87
73
|
extra_rdoc_files: []
|
88
74
|
files:
|
89
75
|
- lib/gared.rb
|
90
|
-
- lib/gared/aleph.rb
|
91
76
|
- lib/gared/googlebooks.rb
|
92
77
|
- lib/gared/hebrewbooks.rb
|
93
78
|
- lib/gared/holding.rb
|
@@ -102,7 +87,7 @@ homepage: https://gitlab.com/abartov/gared
|
|
102
87
|
licenses:
|
103
88
|
- MIT
|
104
89
|
metadata: {}
|
105
|
-
post_install_message:
|
90
|
+
post_install_message:
|
106
91
|
rdoc_options: []
|
107
92
|
require_paths:
|
108
93
|
- lib
|
@@ -117,8 +102,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
102
|
- !ruby/object:Gem::Version
|
118
103
|
version: '0'
|
119
104
|
requirements: []
|
120
|
-
rubygems_version: 3.
|
121
|
-
signing_key:
|
105
|
+
rubygems_version: 3.2.32
|
106
|
+
signing_key:
|
122
107
|
specification_version: 4
|
123
108
|
summary: Scrape Hebrew bibliography sources
|
124
109
|
test_files:
|
data/lib/gared/aleph.rb
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
# Z39.50 values according to https://www.loc.gov/z3950/agency/bib1.html
|
2
|
-
# and NLI info according to http://web.nli.org.il/sites/NLI/Hebrew/infochannels/librarians/Pages/Z39.50.aspx
|
3
|
-
# Name of Database: NNL01
|
4
|
-
# Host name: aleph.nli.org.il
|
5
|
-
# IP address: 192.114.7.200
|
6
|
-
# Port: 9991
|
7
|
-
# Character-set: UTF-8
|
8
|
-
# We support the following record syntaxes:
|
9
|
-
# USMARC, OPAC, XML
|
10
|
-
# We support the following word searches:
|
11
|
-
# 1016, 1017, 1,1003, 1004,4,21,30,31,7,12,1007,1031,1007,5028,1033
|
12
|
-
# We support the following phrase searches:
|
13
|
-
# 7,12,1,1003,1004,4,21,15
|
14
|
-
# We support the following sorts:
|
15
|
-
# 1,4,30,31,1003
|
16
|
-
|
17
|
-
module Gared
|
18
|
-
require 'zoom'
|
19
|
-
require 'nokogiri'
|
20
|
-
class Aleph
|
21
|
-
def initialize(host, port, database, syntax = 'USMARC')
|
22
|
-
@options = {host: host, port: port, database: database, syntax: syntax}
|
23
|
-
end
|
24
|
-
def query_persons(q)
|
25
|
-
end
|
26
|
-
|
27
|
-
def query_person(person)
|
28
|
-
end
|
29
|
-
|
30
|
-
def query_publications(q)
|
31
|
-
end
|
32
|
-
|
33
|
-
def query_publication(publication)
|
34
|
-
end
|
35
|
-
|
36
|
-
def query_publications_by_person(person, ctx = nil)
|
37
|
-
ZOOM::Connection.open(@options[:host], @options[:port]) do |conn|
|
38
|
-
conn.database_name = @options[:database] # 'aleph.nli.org.il',9991
|
39
|
-
conn.preferred_record_syntax = @options[:syntax]
|
40
|
-
rset = conn.search("@attr 1=1003 @attr 2=3 @attr 4=1 \"#{person}\"")
|
41
|
-
rr = rset.records
|
42
|
-
return nil if rr.nil? or rr.empty?
|
43
|
-
ret = []
|
44
|
-
rr.each do |r|
|
45
|
-
xml = Nokogiri::Slop(r.xml)
|
46
|
-
xml.remove_namespaces! # keeps biting me :)
|
47
|
-
# these scrapes are based on the National Library of Israel usage. No attempt to make it generic. :)
|
48
|
-
p = Publication.new(ctx)
|
49
|
-
begin
|
50
|
-
p.author_line = xml.xpath('//datafield[@tag=\'100\']/subfield[@code=\'a\']')[0].text
|
51
|
-
# puts "author: #{p.author_line}" # DEBUG
|
52
|
-
rescue
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
begin
|
56
|
-
p.title = xml.xpath('//datafield[@tag=\'245\']/subfield[@code=\'a\']')[0].text
|
57
|
-
# puts "title: #{p.title}" # DEBUG
|
58
|
-
rescue
|
59
|
-
nil
|
60
|
-
end
|
61
|
-
begin
|
62
|
-
p.notes = xml.xpath('//datafield[@tag=\'500\']/subfield[@code=\'a\']').collect{|note| note.text}.join("\n")
|
63
|
-
rescue
|
64
|
-
nil
|
65
|
-
end
|
66
|
-
begin
|
67
|
-
h = Holding.new
|
68
|
-
h.source_id = xml.xpath('//datafield[@tag=\'090\']/subfield[@code=\'a\']')[0].text
|
69
|
-
h.source_name = @options[:database]
|
70
|
-
p.add_holding(h)
|
71
|
-
ret << p
|
72
|
-
rescue
|
73
|
-
nil # ignore records with no holdings; they may be archival files or other non-publications
|
74
|
-
end #
|
75
|
-
end
|
76
|
-
return ret
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|