gared 0.0.23 → 0.0.27

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
4
- data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
3
+ metadata.gz: e8dcff1820d0b68f25346b0899c3417eb0aa758ef282c65bf008aa0c36e5dd2a
4
+ data.tar.gz: 21277a0b51dbd03c75598e1d5ca37c09875d32e62e668facec2c82f335f3c78c
5
5
  SHA512:
6
- metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
7
- data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
6
+ metadata.gz: d37c27081fb9385373d0336ac2d8d3f270e35c520ae180bfcea85e51f5fe13ae157fe6c2d8c97407890f0a4f899c8f416a757941470e3cc0cb8471e65ce22cca
7
+ data.tar.gz: 86f92c9ca4609bdeb31180bc6e63f46c76f212bc5665b3acd6997510cb91459b5a11356b44be07f707a626d512074f19c466d9cc085caf38f2e8fde0230a2604
@@ -5,9 +5,13 @@ module Gared
5
5
  def initialize(api_key, page_size = '40')
6
6
  @options = {api_key: api_key, maxResults: page_size}
7
7
  end
8
-
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
9
13
  def query_publications_by_person(person, ctx = nil)
10
- url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{URI.escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
14
+ url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{uri_escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
11
15
  resp = JSON.parse(RestClient.get(url))
12
16
 
13
17
  total = resp['totalItems']
data/lib/gared/nli_api.rb CHANGED
@@ -5,7 +5,11 @@ module Gared
5
5
  def initialize(url, api_key)
6
6
  @options = {url: url, api_key: api_key}
7
7
  end
8
-
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
9
13
  def query_persons(q)
10
14
  end
11
15
 
@@ -36,15 +40,15 @@ module Gared
36
40
  ret = []
37
41
  begin
38
42
  # first run obtain counts for the query
39
- escaped_person = URI.escape(person)
40
- url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
43
+ escaped_person = uri_escape(person)
44
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&count_mode=true"
41
45
  json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
42
46
  total = json['countInfos']['total']
43
47
  # then start loading the results
44
48
  result_page = 1
45
49
  recs = []
46
50
  while recs.length < total
47
- url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
51
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&result_page=#{result_page}"
48
52
  puts "DBG: retrieving results page #{result_page}"
49
53
  json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
50
54
  recs += json
@@ -79,7 +83,7 @@ module Gared
79
83
  end
80
84
  end
81
85
  # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
82
- # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
86
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=book
83
87
  rescue Exception
84
88
  puts $!
85
89
  end
data/lib/gared/primo.rb CHANGED
@@ -6,7 +6,11 @@ module Gared
6
6
  def initialize(url, institution)
7
7
  @options = {url: url, institution: institution}
8
8
  end
9
-
9
+ def uri_escape(s)
10
+ p = URI::Parser.new
11
+ return p.escape(s)
12
+ end
13
+
10
14
  def query_persons(q)
11
15
  end
12
16
 
@@ -23,14 +27,14 @@ module Gared
23
27
  def query_publications_by_person(person, ctx = nil)
24
28
  ret = []
25
29
  begin
26
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
30
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
27
31
  json = JSON.parse(RestClient.get(url))
28
32
  total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
29
33
  start_at = 1
30
34
  recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
31
35
  while recs.length < total
32
36
  start_at += 50
33
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
37
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
34
38
  json = JSON.parse(RestClient.get(url))
35
39
  recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
36
40
  sleep 1 # respect the server and avoid flood-blocking
data/lib/gared.rb CHANGED
@@ -9,5 +9,4 @@ module Gared
9
9
  require 'gared/googlebooks'
10
10
  require 'gared/nli_api'
11
11
  # ...
12
-
13
12
  end
data/test/test_gared.rb CHANGED
@@ -4,44 +4,40 @@ require 'gared'
4
4
 
5
5
  class GaredTest < Minitest::Test
6
6
 
7
- def test_nli_api_query_publicatios_by_person
7
+ def test_nli_api_query_publications_by_person
8
8
  if ENV['NLI_API_KEY'].nil?
9
9
  puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
10
  return
11
11
  end
12
12
  puts "Testing NLI API"
13
+ byebug
13
14
  nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
15
  refute_nil nli
15
- recs = nli.query_publications_by_person('ביאליק')
16
- #recs = nli.query_publications_by_person('אילנאה')
16
+ #recs = nli.query_publications_by_person('ביאליק')
17
+ recs = nli.query_publications_by_person('אילנאה')
17
18
  refute_nil recs
18
19
  refute_empty(recs)
19
20
  refute_empty(recs[0].title)
20
21
  end
21
22
 
22
- def test_primo_query_publicatios_by_person
23
- puts "Testing Primo"
24
- primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
25
- refute_nil primo
26
- recs = primo.query_publications_by_person('אילנאה')
27
- refute_nil recs
28
- refute_empty(recs)
29
- refute_empty(recs[0].title)
30
- end
23
+ # temporarily disabled until we find another Primo server to test against
24
+ # def test_primo_query_publications_by_person
25
+ # puts "Testing Primo"
26
+ # primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
27
+ # refute_nil primo
28
+ # recs = primo.query_publications_by_person('אילנאה')
29
+ # refute_nil recs
30
+ # refute_empty(recs)
31
+ # refute_empty(recs[0].title)
32
+ # end
31
33
 
32
- def test_aleph_query_publicatios_by_person
33
- puts "Testing Aleph"
34
- aleph = Gared::Aleph.new('aleph.nli.org.il', 9991, 'NNL01')
35
- refute_nil aleph
36
- recs = aleph.query_publications_by_person('אילנאה')
37
- refute_nil recs
38
- refute_empty(recs)
39
- refute_empty(recs[0].title)
40
- end
41
-
42
- def test_googlebooks_query_publicatios_by_person
34
+ def test_googlebooks_query_publications_by_person
35
+ if ENV['GOOGLE_API_KEY'].nil?
36
+ puts "skipping Google Books API test because GOOGLE_API_KEY envvar is not set"
37
+ return
38
+ end
43
39
  puts "Testing Google Books"
44
- gb = Gared::Googlebooks.new('AIzaSyCE2WFqTPdxAz1wv2f33hMfPWIF4tcocgM') # a key I made just for testing this gem. Please do not abuse.
40
+ gb = Gared::Googlebooks.new(ENV['GOOGLE_API_KEY'])
45
41
  refute_nil gb
46
42
  recs = gb.query_publications_by_person('מנדלי')
47
43
  refute_nil recs
@@ -49,7 +45,7 @@ class GaredTest < Minitest::Test
49
45
  refute_empty(recs[0].title)
50
46
  end
51
47
 
52
- def test_hebrewbooks_query_publicatios_by_person
48
+ def test_hebrewbooks_query_publications_by_person
53
49
  skip("Skipping testing Hebrewbooks because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
54
50
  puts "Testing Hebrewbooks"
55
51
  hb = Gared::Hebrewbooks.new
@@ -60,7 +56,7 @@ class GaredTest < Minitest::Test
60
56
  refute_empty(recs[0].title)
61
57
  end
62
58
 
63
- def test_idea_query_publicatios_by_person
59
+ def test_idea_query_publications_by_person
64
60
  skip("Skipping testing IDEA because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
65
61
  puts "Testing IDEA"
66
62
  idea = Gared::Idea.new('http://infocenters.co.il/RAANANA/')
@@ -74,4 +70,4 @@ class GaredTest < Minitest::Test
74
70
  assert_empty(recs)
75
71
  end
76
72
 
77
- end
73
+ end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.23
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-13 00:00:00.000000000 Z
11
+ date: 2023-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: zoom
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0.5'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0.5'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: watir
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -87,7 +73,6 @@ extensions: []
87
73
  extra_rdoc_files: []
88
74
  files:
89
75
  - lib/gared.rb
90
- - lib/gared/aleph.rb
91
76
  - lib/gared/googlebooks.rb
92
77
  - lib/gared/hebrewbooks.rb
93
78
  - lib/gared/holding.rb
@@ -102,7 +87,7 @@ homepage: https://gitlab.com/abartov/gared
102
87
  licenses:
103
88
  - MIT
104
89
  metadata: {}
105
- post_install_message:
90
+ post_install_message:
106
91
  rdoc_options: []
107
92
  require_paths:
108
93
  - lib
@@ -117,8 +102,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
102
  - !ruby/object:Gem::Version
118
103
  version: '0'
119
104
  requirements: []
120
- rubygems_version: 3.1.4
121
- signing_key:
105
+ rubygems_version: 3.2.32
106
+ signing_key:
122
107
  specification_version: 4
123
108
  summary: Scrape Hebrew bibliography sources
124
109
  test_files:
data/lib/gared/aleph.rb DELETED
@@ -1,80 +0,0 @@
1
- # Z39.50 values according to https://www.loc.gov/z3950/agency/bib1.html
2
- # and NLI info according to http://web.nli.org.il/sites/NLI/Hebrew/infochannels/librarians/Pages/Z39.50.aspx
3
- # Name of Database: NNL01
4
- # Host name: aleph.nli.org.il
5
- # IP address: 192.114.7.200
6
- # Port: 9991
7
- # Character-set: UTF-8
8
- # We support the following record syntaxes:
9
- # USMARC, OPAC, XML
10
- # We support the following word searches:
11
- # 1016, 1017, 1,1003, 1004,4,21,30,31,7,12,1007,1031,1007,5028,1033
12
- # We support the following phrase searches:
13
- # 7,12,1,1003,1004,4,21,15
14
- # We support the following sorts:
15
- # 1,4,30,31,1003
16
-
17
- module Gared
18
- require 'zoom'
19
- require 'nokogiri'
20
- class Aleph
21
- def initialize(host, port, database, syntax = 'USMARC')
22
- @options = {host: host, port: port, database: database, syntax: syntax}
23
- end
24
- def query_persons(q)
25
- end
26
-
27
- def query_person(person)
28
- end
29
-
30
- def query_publications(q)
31
- end
32
-
33
- def query_publication(publication)
34
- end
35
-
36
- def query_publications_by_person(person, ctx = nil)
37
- ZOOM::Connection.open(@options[:host], @options[:port]) do |conn|
38
- conn.database_name = @options[:database] # 'aleph.nli.org.il',9991
39
- conn.preferred_record_syntax = @options[:syntax]
40
- rset = conn.search("@attr 1=1003 @attr 2=3 @attr 4=1 \"#{person}\"")
41
- rr = rset.records
42
- return nil if rr.nil? or rr.empty?
43
- ret = []
44
- rr.each do |r|
45
- xml = Nokogiri::Slop(r.xml)
46
- xml.remove_namespaces! # keeps biting me :)
47
- # these scrapes are based on the National Library of Israel usage. No attempt to make it generic. :)
48
- p = Publication.new(ctx)
49
- begin
50
- p.author_line = xml.xpath('//datafield[@tag=\'100\']/subfield[@code=\'a\']')[0].text
51
- # puts "author: #{p.author_line}" # DEBUG
52
- rescue
53
- nil
54
- end
55
- begin
56
- p.title = xml.xpath('//datafield[@tag=\'245\']/subfield[@code=\'a\']')[0].text
57
- # puts "title: #{p.title}" # DEBUG
58
- rescue
59
- nil
60
- end
61
- begin
62
- p.notes = xml.xpath('//datafield[@tag=\'500\']/subfield[@code=\'a\']').collect{|note| note.text}.join("\n")
63
- rescue
64
- nil
65
- end
66
- begin
67
- h = Holding.new
68
- h.source_id = xml.xpath('//datafield[@tag=\'090\']/subfield[@code=\'a\']')[0].text
69
- h.source_name = @options[:database]
70
- p.add_holding(h)
71
- ret << p
72
- rescue
73
- nil # ignore records with no holdings; they may be archival files or other non-publications
74
- end #
75
- end
76
- return ret
77
- end
78
- end
79
- end
80
- end