gared 0.0.22 → 0.0.23

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 12ad7a1d34ae412a0828d5a9ea7297743d631781248f2ecbcfe646e316cfc42e
4
- data.tar.gz: 4c2dab22f6984e50971164a9c984b6034053e0c86a717130afe07b7c743b5fcf
3
+ metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
4
+ data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
5
5
  SHA512:
6
- metadata.gz: 3825a5a4bac92b0bb92f3b5fe5955c2a7780a489ceaae7718fc4eead41fa9788b567b2359a8a75efa51c88b8d65623580d27b93e3e29d95f41858ea9b76e66e9
7
- data.tar.gz: 299626204c6f0b3f8cffdb46cc0548c41d7583fc094e3d0455ccead2f2198c4ec9805914036c8c5144874d6487ce980024ce52d016ce3955336351123204a9a2
6
+ metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
7
+ data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
data/lib/gared.rb CHANGED
@@ -7,6 +7,7 @@ module Gared
7
7
  require 'gared/hebrewbooks'
8
8
  require 'gared/idea'
9
9
  require 'gared/googlebooks'
10
+ require 'gared/nli_api'
10
11
  # ...
11
12
 
12
13
  end
data/lib/gared/jpress.rb CHANGED
@@ -1,35 +1,4 @@
1
1
  module Gared
2
2
  class Jpress
3
- require 'watir'
4
-
5
- def initialize
6
- @browser = Watir::Browser.new :chrome, options: {args: ['--no-sandbox', '--headless']}
7
- end
8
-
9
- def query_persons(q)
10
- end
11
-
12
- def query_person(person)
13
- end
14
-
15
- def query_publications(q)
16
- end
17
-
18
- def query_publication(publication)
19
- end
20
-
21
- # return in-memory Publication instances with associated Holdings
22
- def query_publications_by_person(person, ctx = nil)
23
- @browser.goto 'http://web.nli.org.il/sites/JPress/Hebrew/Pages/default.aspx'
24
- @browser.wait
25
-
26
- t = @browser.text_field(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_InputKeywords') # srsly, Micro$oft
27
- t.set(person)
28
- @browser.a(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_go').click # "quick search" - not necessarily by author!
29
- @browser.wait
30
- ret = []
31
- begin
32
-
33
- end
34
3
  end
35
4
  end
@@ -0,0 +1,89 @@
1
+ require 'rest-client'
2
+
3
+ module Gared
4
+ class Nli_Api
5
+ def initialize(url, api_key)
6
+ @options = {url: url, api_key: api_key}
7
+ end
8
+
9
+ def query_persons(q)
10
+ end
11
+
12
+ def query_person(person)
13
+ end
14
+
15
+ def query_publications(q)
16
+ end
17
+
18
+ def query_publication(publication)
19
+ end
20
+ def fetch_value_by_dc_key(record, key)
21
+ ret = ''
22
+ fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
23
+ unless record.nil?
24
+ unless record[fullkey].nil?
25
+ if record[fullkey].class == String
26
+ ret = record[fullkey ]
27
+ elsif record[fullkey].class == Array
28
+ ret = record[fullkey].map{|x| x['@value'] }.join('; ')
29
+ end
30
+ end
31
+ end
32
+ ret
33
+ end
34
+ # return in-memory Publication instances with associated Holdings
35
+ def query_publications_by_person(person, ctx = nil)
36
+ ret = []
37
+ begin
38
+ # first run obtain counts for the query
39
+ escaped_person = URI.escape(person)
40
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
41
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
42
+ total = json['countInfos']['total']
43
+ # then start loading the results
44
+ result_page = 1
45
+ recs = []
46
+ while recs.length < total
47
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
48
+ puts "DBG: retrieving results page #{result_page}"
49
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
50
+ recs += json
51
+ result_page += 1
52
+ # sleep 1 # respect the server and avoid flood-blocking
53
+ end
54
+ recs.each do |r|
55
+ begin
56
+ p = Publication.new(ctx)
57
+ p.title = fetch_value_by_dc_key(r, 'title')
58
+ p.author_line = fetch_value_by_dc_key(r, 'creator')
59
+ p.language = fetch_value_by_dc_key(r, 'language')
60
+ p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
61
+ p.publisher_line = fetch_value_by_dc_key(r,'publisher')
62
+ p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
63
+ p.source_id = fetch_value_by_dc_key(r, '@id')
64
+ # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
65
+ additional_urls = []
66
+ r.keys.each do |key|
67
+ val = fetch_value_by_dc_key(r, key)
68
+ additional_urls << val if val =~ /https?:[^\s]\/\//
69
+ end
70
+ p.additional_urls = additional_urls if additional_urls.length > 0
71
+ h = Holding.new
72
+ h.source_id = p.source_id
73
+ h.source_name = 'NLI API'
74
+ h.location = fetch_value_by_dc_key(r, 'recordid')
75
+ p.add_holding(h)
76
+ ret << p
77
+ rescue Exception
78
+ puts $!
79
+ end
80
+ end
81
+ # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
82
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
83
+ rescue Exception
84
+ puts $!
85
+ end
86
+ return ret
87
+ end
88
+ end
89
+ end
data/test/test_gared.rb CHANGED
@@ -1,8 +1,24 @@
1
+ require 'minitest/byebug' if ENV['DEBUG']
1
2
  require 'minitest/autorun'
2
3
  require 'gared'
3
4
 
4
5
  class GaredTest < Minitest::Test
5
6
 
7
+ def test_nli_api_query_publicatios_by_person
8
+ if ENV['NLI_API_KEY'].nil?
9
+ puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
+ return
11
+ end
12
+ puts "Testing NLI API"
13
+ nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
+ refute_nil nli
15
+ recs = nli.query_publications_by_person('ביאליק')
16
+ #recs = nli.query_publications_by_person('אילנאה')
17
+ refute_nil recs
18
+ refute_empty(recs)
19
+ refute_empty(recs[0].title)
20
+ end
21
+
6
22
  def test_primo_query_publicatios_by_person
7
23
  puts "Testing Primo"
8
24
  primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.22
4
+ version: 0.0.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-19 00:00:00.000000000 Z
11
+ date: 2021-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zoom
@@ -93,6 +93,7 @@ files:
93
93
  - lib/gared/holding.rb
94
94
  - lib/gared/idea.rb
95
95
  - lib/gared/jpress.rb
96
+ - lib/gared/nli_api.rb
96
97
  - lib/gared/person.rb
97
98
  - lib/gared/primo.rb
98
99
  - lib/gared/publication.rb
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
117
  - !ruby/object:Gem::Version
117
118
  version: '0'
118
119
  requirements: []
119
- rubyforge_project:
120
- rubygems_version: 2.7.7
120
+ rubygems_version: 3.1.4
121
121
  signing_key:
122
122
  specification_version: 4
123
123
  summary: Scrape Hebrew bibliography sources