gared 0.0.29 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gared/primo.rb +35 -33
  3. data/test/test_gared.rb +3 -3
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a4f6f4cbc307c097bf84e4308d7cc76257583ba58f97d0c861ebd79e1e362e6
4
- data.tar.gz: 83aca016e843de3bbdbdfbeecdf1b4b544a376d3205970a810c5076ce3d3466e
3
+ metadata.gz: ae6b9daf5b73b6ff01acc3f098ad45ae0dccac12ea146f6966d58e25c3d8a74e
4
+ data.tar.gz: 19396d2e5e6701a22dd7d27e49491a9bd816a60d48278ca5562843c305f88282
5
5
  SHA512:
6
- metadata.gz: 686d91870739b68c421f68c3a55fd8443dec0f4747118137d0cd256758e67e223c9db5617c7cbd408a4295c32936741172f8865dd96c1303a318841ff4157a41
7
- data.tar.gz: 85336cda8af09b48fa3515d90efb5898dbe28592282c17ce3f352cde96ba459aa50a9cab7c2b32c6d32df4afe52a547d7a999a02a19eb4c4edf28e5beff729fd
6
+ metadata.gz: c37a03e8f88f7c9b7d63761ced83736bfbedf55e3e94aee1c0fa9d53855d6d65a899a0e171d142ae46031dfd177fa3214e2cf8a4eb844f1f82db13b355ada1df
7
+ data.tar.gz: f4dd892a74e7fd1282e200f30ea1ab76bd87ebcd30629ec64a6c8c47a87d2d4eeef67481dd35454fe7f967a46b7a6da23bc3274ee7bf02c3fb87903a99720945
data/lib/gared/primo.rb CHANGED
@@ -3,8 +3,8 @@ require 'rest-client'
3
3
 
4
4
  module Gared
5
5
  class Primo
6
- def initialize(url, institution)
7
- @options = {url: url, institution: institution}
6
+ def initialize(url, vid, scope, apikey)
7
+ @options = {url: url, vid: vid, scope: scope, apikey: apikey}
8
8
  end
9
9
  def uri_escape(s)
10
10
  p = URI::Parser.new
@@ -27,48 +27,50 @@ module Gared
27
27
  def query_publications_by_person(person, ctx = nil)
28
28
  ret = []
29
29
  begin
30
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
31
- json = JSON.parse(RestClient.get(url))
32
- total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
33
- start_at = 1
34
- recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
30
+ base_url = "#{@options[:url]}?vid=#{@options[:vid]}&scope=#{@options[:scope]}&q=creator,contains,#{uri_escape(person)}&qInclude=facet_rtype,exact,books&apikey=#{@options[:apikey]}&limit=50"
31
+ json = JSON.parse(RestClient.get(base_url))
32
+ total = json['info']['total'].to_i
33
+ start_at = 0
34
+ recs = json['docs'] # stash the records
35
35
  while recs.length < total
36
36
  start_at += 50
37
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
37
+ url = base_url+"&offset=#{start_at}"
38
38
  json = JSON.parse(RestClient.get(url))
39
- recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
40
- sleep 1 # respect the server and avoid flood-blocking
39
+ recs += json['docs']
40
+ sleep 0.5 # respect the server and avoid flood-blocking
41
41
  end
42
42
  recs.each do |r|
43
43
  begin
44
- deets = r['PrimoNMBib']['record']['display']
44
+ deets = r['pnx']['display'] # the fields inside are now always an array!
45
45
  p = Publication.new(ctx)
46
- p.title = deets['title']
47
- p.author_line = deets['creator']
48
- p.language = deets['language']
49
- p.notes = "#{deets['format']}\n#{deets['subject']}"
50
- p.publisher_line = deets['publisher']
51
- p.pub_year = deets['creationdate']
52
- p.source_id = r['PrimoNMBib']['record']['control']['sourcerecordid']
53
- # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
54
- additional_urls = []
55
- deets.keys.each do |key|
56
- if deets[key].class == Array
57
- deets[key].each do |kkey|
58
- additional_urls << kkey if kkey.class == String && kkey =~ /https?:[^\s]/
59
- end
60
- elsif deets[key].class == String
61
- additional_urls << deets[key] if deets[key] =~ /https?:[^\s]/
62
- end
63
- end
64
- p.additional_urls = additional_urls if additional_urls.length > 0
46
+ p.title = deets['title'].join('; ')
47
+ p.author_line = deets['creator'].join('; ')
48
+ p.author_line += deets['contributor'].join('; ') if deets['contributor']
49
+ p.language = deets['language'].join('; ')
50
+ p.notes = deets['format'].join('; ')+r['pnx']['search']['subject'].join('; ') # "#{deets['format']}\n#{deets['subject']}"
51
+ p.publisher_line = deets['publisher'].join('; ')
52
+ p.pub_year = deets['creationdate'].join('; ')
53
+ p.source_id = r['pnx']['control']['sourcerecordid'].join('; ')
54
+ ## collect additional URLS from record, for clients to be able to determine whether a scanned object exists
55
+ # commented out for now, as it needs to be rewritten for the new Primo output structure
56
+ #additional_urls = []
57
+ #deets.keys.each do |key|
58
+ # if deets[key].class == Array
59
+ # deets[key].each do |kkey|
60
+ # additional_urls << kkey if kkey.class == String && kkey =~ /https?:[^\s]/
61
+ # end
62
+ # elsif deets[key].class == String
63
+ # additional_urls << deets[key] if deets[key] =~ /https?:[^\s]/
64
+ # end
65
+ #end
66
+ #p.additional_urls = additional_urls if additional_urls.length > 0
65
67
  h = Holding.new
66
68
  h.source_id = p.source_id
67
- h.source_name = 'Primo:'+@options[:institution]
69
+ h.source_name = 'Primo:'+@options[:vid]
68
70
 
69
71
  begin
70
- h.location = r['LIBRARIES']['LIBRARY'][0].nil? ? r['LIBRARIES']['LIBRARY']['callNumber'] : r['LIBRARIES']['LIBRARY'][0]['callNumber'] # there seem to be two cases, different between NLI and TAU, for example; there also seems to not even be a LIBRARIES key for some electronic resources. We're skipping those.
71
- rescue Exception
72
+ h.location = r['pnx']['search']['callnumber']
73
+ rescue Exception # We're skipping things without a callnumber, as they are not resources within the library itself.
72
74
  puts $!
73
75
  end
74
76
  p.add_holding(h)
data/test/test_gared.rb CHANGED
@@ -1,6 +1,6 @@
1
- require 'minitest/byebug' if ENV['DEBUG']
2
1
  require 'minitest/autorun'
3
2
  require 'gared'
3
+ require 'minitest/debugger' if ENV['DEBUG']
4
4
 
5
5
  class GaredTest < Minitest::Test
6
6
 
@@ -22,8 +22,8 @@ class GaredTest < Minitest::Test
22
22
 
23
23
  # temporarily disabled until we find another Primo server to test against
24
24
  def test_primo_query_publications_by_person
25
- puts "Testing Primo"
26
- primo = Gared::Primo.new('http://tau-primo.hosted.exlibrisgroup.com/PrimoWebServices/xservice/search/brief', 'TAU')
25
+ puts "Testing Primo (did you remember to put a key in PRIMO_API_KEY?)"
26
+ primo = Gared::Primo.new('https://api-eu.hosted.exlibrisgroup.com/primo/v1/search', 'HAU', 'books_and_more', ENV['PRIMO_API_KEY'])
27
27
  refute_nil primo
28
28
  recs = primo.query_publications_by_person('אילנאה')
29
29
  refute_nil recs
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.29
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-06 00:00:00.000000000 Z
11
+ date: 2024-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: watir
@@ -102,7 +102,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  requirements: []
105
- rubygems_version: 3.4.10
105
+ rubygems_version: 3.5.1
106
106
  signing_key:
107
107
  specification_version: 4
108
108
  summary: Scrape Hebrew bibliography sources