gared 0.0.18 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad89b9fa713cb95d420566a0a9735ad839907661996bfa0e816a87391010e8b6
4
- data.tar.gz: f450165383f2e8c030b4e5e25ade74d5a927009aa2a6c92f174c68e76e5003c1
3
+ metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
4
+ data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
5
5
  SHA512:
6
- metadata.gz: e5f35bc4cdb1efe388224d5b0e7973eb40fe7c3e9d241961562b87998e56a058509b9040e94752177b1df3df56ce8aec4617953af7d253bbbfa6e338285c5cba
7
- data.tar.gz: 6b070deefebb23a21d191155bc1e5ff8bf80a60261a74a657486ce2fa44d8244fbeed3ec19a2ae9a196d54649e23a5dced67dace6daaaa2ba1f68c1a548cdf49
6
+ metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
7
+ data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
data/lib/gared.rb CHANGED
@@ -7,6 +7,7 @@ module Gared
7
7
  require 'gared/hebrewbooks'
8
8
  require 'gared/idea'
9
9
  require 'gared/googlebooks'
10
+ require 'gared/nli_api'
10
11
  # ...
11
12
 
12
13
  end
data/lib/gared/holding.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module Gared
2
2
  class Holding
3
- attr_accessor :source_id, :source_name, :scan_url
3
+ attr_accessor :source_id, :source_name, :scan_url, :location
4
4
 
5
5
  end
6
6
  end
@@ -0,0 +1,89 @@
1
+ require 'rest-client'
2
+
3
+ module Gared
4
+ class Nli_Api
5
+ def initialize(url, api_key)
6
+ @options = {url: url, api_key: api_key}
7
+ end
8
+
9
+ def query_persons(q)
10
+ end
11
+
12
+ def query_person(person)
13
+ end
14
+
15
+ def query_publications(q)
16
+ end
17
+
18
+ def query_publication(publication)
19
+ end
20
+ def fetch_value_by_dc_key(record, key)
21
+ ret = ''
22
+ fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
23
+ unless record.nil?
24
+ unless record[fullkey].nil?
25
+ if record[fullkey].class == String
26
+ ret = record[fullkey ]
27
+ elsif record[fullkey].class == Array
28
+ ret = record[fullkey].map{|x| x['@value'] }.join('; ')
29
+ end
30
+ end
31
+ end
32
+ ret
33
+ end
34
+ # return in-memory Publication instances with associated Holdings
35
+ def query_publications_by_person(person, ctx = nil)
36
+ ret = []
37
+ begin
38
+ # first run obtain counts for the query
39
+ escaped_person = URI.escape(person)
40
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
41
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
42
+ total = json['countInfos']['total']
43
+ # then start loading the results
44
+ result_page = 1
45
+ recs = []
46
+ while recs.length < total
47
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
48
+ puts "DBG: retrieving results page #{result_page}"
49
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
50
+ recs += json
51
+ result_page += 1
52
+ # sleep 1 # respect the server and avoid flood-blocking
53
+ end
54
+ recs.each do |r|
55
+ begin
56
+ p = Publication.new(ctx)
57
+ p.title = fetch_value_by_dc_key(r, 'title')
58
+ p.author_line = fetch_value_by_dc_key(r, 'creator')
59
+ p.language = fetch_value_by_dc_key(r, 'language')
60
+ p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
61
+ p.publisher_line = fetch_value_by_dc_key(r,'publisher')
62
+ p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
63
+ p.source_id = fetch_value_by_dc_key(r, '@id')
64
+ # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
65
+ additional_urls = []
66
+ r.keys.each do |key|
67
+ val = fetch_value_by_dc_key(r, key)
68
+ additional_urls << val if val =~ /https?:[^\s]\/\//
69
+ end
70
+ p.additional_urls = additional_urls if additional_urls.length > 0
71
+ h = Holding.new
72
+ h.source_id = p.source_id
73
+ h.source_name = 'NLI API'
74
+ h.location = fetch_value_by_dc_key(r, 'recordid')
75
+ p.add_holding(h)
76
+ ret << p
77
+ rescue Exception
78
+ puts $!
79
+ end
80
+ end
81
+ # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
82
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
83
+ rescue Exception
84
+ puts $!
85
+ end
86
+ return ret
87
+ end
88
+ end
89
+ end
data/lib/gared/primo.rb CHANGED
@@ -23,14 +23,14 @@ module Gared
23
23
  def query_publications_by_person(person, ctx = nil)
24
24
  ret = []
25
25
  begin
26
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books}&json=true"
26
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
27
27
  json = JSON.parse(RestClient.get(url))
28
28
  total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
29
29
  start_at = 1
30
30
  recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
31
31
  while recs.length < total
32
32
  start_at += 50
33
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books}&json=true"
33
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
34
34
  json = JSON.parse(RestClient.get(url))
35
35
  recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
36
36
  sleep 1 # respect the server and avoid flood-blocking
@@ -55,6 +55,8 @@ module Gared
55
55
  h = Holding.new
56
56
  h.source_id = p.source_id
57
57
  h.source_name = 'Primo:'+@options[:institution]
58
+
59
+ h.location = r['LIBRARIES']['LIBRARY'][0].nil? ? r['LIBRARIES']['LIBRARY']['callNumber'] : r['LIBRARIES']['LIBRARY'][0]['callNumber'] # there seem to be two cases, different between NLI and TAU, for example
58
60
  p.add_holding(h)
59
61
  ret << p
60
62
  rescue Exception
@@ -67,4 +69,4 @@ module Gared
67
69
  return ret
68
70
  end
69
71
  end
70
- end
72
+ end
data/test/test_gared.rb CHANGED
@@ -1,8 +1,24 @@
1
+ require 'minitest/byebug' if ENV['DEBUG']
1
2
  require 'minitest/autorun'
2
3
  require 'gared'
3
4
 
4
5
  class GaredTest < Minitest::Test
5
6
 
7
+ def test_nli_api_query_publicatios_by_person
8
+ if ENV['NLI_API_KEY'].nil?
9
+ puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
+ return
11
+ end
12
+ puts "Testing NLI API"
13
+ nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
+ refute_nil nli
15
+ recs = nli.query_publications_by_person('ביאליק')
16
+ #recs = nli.query_publications_by_person('אילנאה')
17
+ refute_nil recs
18
+ refute_empty(recs)
19
+ refute_empty(recs[0].title)
20
+ end
21
+
6
22
  def test_primo_query_publicatios_by_person
7
23
  puts "Testing Primo"
8
24
  primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-10 00:00:00.000000000 Z
11
+ date: 2021-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zoom
@@ -56,14 +56,14 @@ dependencies:
56
56
  name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.8'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.8'
69
69
  - !ruby/object:Gem::Dependency
@@ -93,6 +93,7 @@ files:
93
93
  - lib/gared/holding.rb
94
94
  - lib/gared/idea.rb
95
95
  - lib/gared/jpress.rb
96
+ - lib/gared/nli_api.rb
96
97
  - lib/gared/person.rb
97
98
  - lib/gared/primo.rb
98
99
  - lib/gared/publication.rb
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
117
  - !ruby/object:Gem::Version
117
118
  version: '0'
118
119
  requirements: []
119
- rubyforge_project:
120
- rubygems_version: 2.7.7
120
+ rubygems_version: 3.1.4
121
121
  signing_key:
122
122
  specification_version: 4
123
123
  summary: Scrape Hebrew bibliography sources