relaton-core 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 22d26ddd3ce683c8ec7a757cc3f164863454f9051f35960708cc5490005cda3b
4
- data.tar.gz: f19cec52c8ebaa1bc7ff526f024d43dd59e3dfb6ffd864a2b386c885a796af8e
3
+ metadata.gz: c165688976f3768ae55ababdba7bfdccd0e3fb3114c4793fe4497d74e88970da
4
+ data.tar.gz: 12f685b833874257bf04d87bf2a74375e2c72681a8861712b37c7cc52f513659
5
5
  SHA512:
6
- metadata.gz: 5d568bd089a08098dae0a461f4d3e53c26048755d60a40348438851285d11f0b28966e27003a019816db68453294dffe9609d3bb6b84ee1115cc1e2f86b4f15a
7
- data.tar.gz: 15063d385e121622a436d1cfab3b0918ff8b0c75259467249c2ecc34ed53cd1200fc9666d0f50a5e2beb31d766c4b27d0568419942e02e9041716cf64e09a368
6
+ metadata.gz: 3cceb7dc3f6dbd33cb08851b63b4d78239d61775e1e91fbd14330549693305b575b966d7b8bf4a66397317308638dde39b61e4eaa5ed9567ce3b0466bb618bbb
7
+ data.tar.gz: 6b5d18e51d50c2e1a5d457de051c32b5dcbb7768d809fb796d67efb8c811a0a5b2eed96874ae8f8d12961cbf347a90ca54e695a53c410444f013fda0f231b9a9
@@ -1,6 +1,6 @@
1
1
  module Relaton::Core
2
2
  class DataFetcher
3
- attr_accessor :docs
3
+ # attr_accessor :docs
4
4
  #
5
5
  # Initialize fetcher
6
6
  #
@@ -11,14 +11,9 @@ module Relaton::Core
11
11
  @output = output
12
12
  @format = format
13
13
  @ext = format.sub "bibxml", "xml"
14
- @files = []
15
- @docs = []
16
- end
17
-
18
- def index
19
- @index ||= Relaton::Index.find_or_create self.class::INDEX_TYPE,
20
- file: self.class::INDEX_FILE,
21
- pubid_class: self.class.get_identifier_class
14
+ @files = Set.new
15
+ # @docs = []
16
+ @errors = Hash.new(true)
22
17
  end
23
18
 
24
19
  # API method for external service
@@ -32,58 +27,35 @@ module Relaton::Core
32
27
  puts "Done in: #{(t2 - t1).round} sec."
33
28
  end
34
29
 
35
- def self.get_identifier_class
36
- raise NotImplementedError, "#{self.class}#get_identifier_class method must be implemented"
37
- end
38
-
39
30
  def fetch
40
- fetch_docs ACTIVE_PUBS_URL
41
- fetch_docs OBSOLETE_PUBS_URL, retired: true
42
- index.save
31
+ raise NotImplementedError, "#{self.class}#fetch method must be implemented"
43
32
  end
44
33
 
45
- # Parse hash and return RelatonBib
46
- # @param [Hash] doc document data
47
- # @return [RelatonBib]
48
- def parse(doc)
49
- raise NotImplementedError, "#{self.class}#parse method must be implemented"
34
+ def gh_issue
35
+ return @gh_issue if defined? @gh_issue
36
+
37
+ @gh_issue = Relaton::Logger::Channels::GhIssue.new(*gh_issue_channel)
38
+ Relaton.logger_pool[:gh_issue] = Relaton::Logger::Log.new(@gh_issue, levels: [:error])
39
+ @gh_issue
50
40
  end
51
41
 
52
- # @param [RelatonBib::BibliographicItem] bib
53
- # @return [String] filename based on PubID identifier
54
- def get_output_file(bib)
55
- File.join @output, "#{bib.docidentifier.first.id.gsub(/[.\s-]+/, '-')}.#{@ext}"
42
+ def gh_issue_channel
43
+ raise NotImplementedError, "#{self.class}#gh_issue_channel method must be implemented"
56
44
  end
57
45
 
58
- #
59
- # Parse document and save to file
60
- #
61
- # @param [Hash] doc document data
62
- # @param [Boolean] retired if true then document is retired
63
- #
64
- # @return [void]
65
- #
66
- def parse_and_save(doc)
67
- bibitem = parse(doc)
68
- save_bib(bibitem)
69
- index_add_or_update(bibitem)
46
+ def repot_errors
47
+ @errors.select { |_, v| v }.each_key { |k| log_error "Failed to fetch #{k}" }
48
+ gh_issue.create_issue
70
49
  end
71
50
 
72
- #
73
- # Save bibitem to file
74
- #
75
- # @param [RelatonBib::BibliographicItem] bib bibitem
76
- #
77
- # @return [void]
78
- #
79
- def save_bib(bib)
80
- file = get_output_file(bib)
81
- File.write file, serialize(bib), encoding: "UTF-8"
51
+ def log_error(_msg)
52
+ raise NoMatchingPatternError, "#{self.class}#log_error method must be implemented"
82
53
  end
83
54
 
84
- def index_add_or_update(bib)
85
- index.add_or_update self.class.get_identifier_class.parse(bib.docidentifier.first.id),
86
- get_output_file(bib)
55
+ # @param [String] document ID
56
+ # @return [String] filename based on PubID identifier
57
+ def output_file(docid)
58
+ File.join @output, "#{docid.downcase.gsub(/[.\s\/-]+/, '-')}.#{@ext}"
87
59
  end
88
60
 
89
61
  #
@@ -94,11 +66,19 @@ module Relaton::Core
94
66
  # @return [String] serialized bibliographic item
95
67
  #
96
68
  def serialize(bib)
97
- case @format
98
- when "yaml" then bib.to_hash.to_yaml
99
- when "xml" then bib.to_xml(bibdata: true)
100
- else bib.send "to_#{@format}"
101
- end
69
+ send "to_#{@format}", bib
70
+ end
71
+
72
+ def to_yaml(bib)
73
+ raise NotImplementedError, "#{self.class}#to_yaml method must be implemented"
74
+ end
75
+
76
+ def to_xml(bib)
77
+ raise NotImplementedError, "#{self.class}#to_xml method must be implemented"
78
+ end
79
+
80
+ def to_bibxml(bib)
81
+ raise NotImplementedError, "#{self.class}#to_bibxml method must be implemented"
102
82
  end
103
83
  end
104
84
  end
@@ -0,0 +1,55 @@
1
+ require "weakref"
2
+
3
+ module Relaton
4
+ module Core
5
+ class Hit
6
+ # @return [RelatonBib::HitCollection]
7
+ attr_accessor :hit_collection
8
+
9
+ # @return [Array<Hash>]
10
+ attr_reader :hit
11
+
12
+ # @param hit [Hash]
13
+ # @param hit_collection [RelatonBib::HitCollection]
14
+ def initialize(hit, hit_collection = nil)
15
+ @hit = hit
16
+ @hit_collection = WeakRef.new hit_collection if hit_collection
17
+ end
18
+
19
+ # @return [String]
20
+ def to_s
21
+ inspect
22
+ end
23
+
24
+ # @return [String]
25
+ def inspect
26
+ "<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)} " \
27
+ "@text=\"#{@hit_collection&.text}\" " \
28
+ "@fetched=\"#{!@fetch.nil?}\" " \
29
+ "@fullIdentifier=\"#{@fetch&.shortref(nil)}\" " \
30
+ "@title=\"#{@hit[:code]}\">"
31
+ end
32
+
33
+ # @return [RelatonBib::ItemData]
34
+ def item
35
+ raise "Not implemented"
36
+ end
37
+
38
+ # @param opts [Hash]
39
+ # @option opts [Nokogiri::XML::Builder] :builder XML builder
40
+ # @option opts [Boolean] :bibdata
41
+ # @option opts [String, Symbol] :lang language
42
+ # @return [String] XML
43
+ # def to_xml(**opts)
44
+ # if opts[:builder]
45
+ # fetch.to_xml(**opts)
46
+ # else
47
+ # builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
48
+ # fetch.to_xml(**opts.merge(builder: xml))
49
+ # end
50
+ # builder.doc.root.to_xml
51
+ # end
52
+ # end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,106 @@
1
+ require "forwardable"
2
+ require_relative "hit"
3
+
4
+ module Relaton
5
+ module Core
6
+ class HitCollection
7
+ extend Forwardable
8
+
9
+ def_delegators :@array, :<<, :[], :first, :empty?, :any?, :size, :each, :each_slice, :reduce, :map
10
+
11
+ # @return [TrueClass, FalseClass]
12
+ attr_reader :fetched
13
+
14
+ # @return [String]
15
+ attr_reader :ref
16
+
17
+ # @return [String]
18
+ attr_reader :year
19
+
20
+ #
21
+ # @param ref [String, Pubid] reference to search
22
+ # @param year [String, nil] year of publication
23
+ #
24
+ def initialize(ref, year = nil)
25
+ @array = []
26
+ @ref = ref
27
+ @year = year
28
+ @fetched = false
29
+ end
30
+
31
+ #
32
+ # Fetches hits from the data source
33
+ #
34
+ # @return [self] self object
35
+ #
36
+ def fetch
37
+ workers = WorkersPool.new 4
38
+ workers.worker(&:item)
39
+ each do |hit|
40
+ workers << hit
41
+ end
42
+ workers.end
43
+ workers.result
44
+ @fetched = true
45
+ self
46
+ end
47
+
48
+ #
49
+ # Renders the collection as XML
50
+ #
51
+ # @param opts [Hash] options
52
+ # @option opts [Nokogiri::XML::Builder] :builder XML builder
53
+ # @option opts [Boolean] :bibdata render bibdata if true
54
+ # @option opts [String, Symbol] :lang language
55
+ #
56
+ # @return [String] XML representation of the collection
57
+ #
58
+ # def to_xml(**opts)
59
+ # builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
60
+ # xml.documents do
61
+ # @array.each do |hit|
62
+ # hit.fetch
63
+ # hit.to_xml(**opts.merge(builder: xml))
64
+ # end
65
+ # end
66
+ # end
67
+ # builder.to_xml
68
+ # end
69
+
70
+ #
71
+ # Selects matching hits and returns a new collection
72
+ #
73
+ # @param [Proc] &block proc to select hits
74
+ #
75
+ # @return [RelatonBib::HitCollection] new hit collection
76
+ #
77
+ def select!(&block)
78
+ @array.select!(&block)
79
+ self
80
+ end
81
+
82
+ def reduce!(sum, &block)
83
+ @array = @array.reduce sum, &block
84
+ self
85
+ end
86
+
87
+ #
88
+ # Returns String representation of the collection
89
+ #
90
+ # @return [String] String representation of the collection
91
+ #
92
+ def to_s
93
+ inspect
94
+ end
95
+
96
+ #
97
+ # Returns String representation of the collection
98
+ #
99
+ # @return [String] String representation of the collection
100
+ #
101
+ def inspect
102
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@ref} @fetched=#{@fetched}>"
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,48 @@
1
+ module Relaton
2
+ module Core
3
+ class Processor
4
+ # @rerurn [Symbol]
5
+ attr_reader :short
6
+
7
+ # @return [String]
8
+ attr_reader :prefix, :idtype
9
+
10
+ # @return [Regexp]
11
+ attr_reader :defaultprefix
12
+
13
+ # @return [Array<String>]
14
+ attr_reader :datasets
15
+
16
+ def initialize
17
+ raise "This is an abstract class!"
18
+ end
19
+
20
+ def get(_code, _date, _opts)
21
+ raise "This is an abstract class!"
22
+ end
23
+
24
+ def fetch_data(_source, _opts)
25
+ raise "This is an abstract class!"
26
+ end
27
+
28
+ def from_xml(_xml)
29
+ raise "This is an abstract class!"
30
+ end
31
+
32
+ def from_yaml(_hash)
33
+ raise "This is an abstract class!"
34
+ end
35
+
36
+ def grammar_hash
37
+ raise "This is an abstract class!"
38
+ end
39
+
40
+ # Retuns default number of workers. Should be overraded by childred classes if need.
41
+ #
42
+ # @return [Integer] nuber of wokrers
43
+ def threads
44
+ 10
45
+ end
46
+ end
47
+ end
48
+ end
@@ -1,5 +1,5 @@
1
1
  module Relaton
2
2
  module Core
3
- VERSION = "0.0.3".freeze
3
+ VERSION = "0.0.4".freeze
4
4
  end
5
5
  end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Relaton
4
+ module Core
5
+ # Workers poll.
6
+ class WorkersPool
7
+ attr_accessor :nb_hits
8
+
9
+ def initialize(num_workers = 2)
10
+ @num_workers = num_workers < 2 ? 2 : num_workers
11
+ @queue = SizedQueue.new(num_workers * 2)
12
+ @result = []
13
+ @nb_hits = 0
14
+ end
15
+
16
+ def worker(&block)
17
+ @threads = Array.new @num_workers do
18
+ Thread.new do
19
+ until (item = @queue.pop) == :END
20
+ @result << yield(item) if block
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ def result
27
+ @threads.each(&:join)
28
+ @result
29
+ end
30
+
31
+ def <<(item)
32
+ @queue << item
33
+ self
34
+ end
35
+
36
+ def end
37
+ @num_workers.times { @queue << :END }
38
+ end
39
+
40
+ def size
41
+ @result.size
42
+ end
43
+ end
44
+ end
45
+ end
data/lib/relaton/core.rb CHANGED
@@ -1,3 +1,6 @@
1
- require "relaton_bib"
2
- require "relaton/index"
1
+ require "relaton/logger"
2
+ require_relative "core/version"
3
+ require_relative "core/processor"
3
4
  require_relative "core/data_fetcher"
5
+ require_relative "core/hit_collection"
6
+ require_relative "core/workers_pool"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-01-17 00:00:00.000000000 Z
11
+ date: 2025-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -39,47 +39,33 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '3.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: relaton-bib
42
+ name: simplecov
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.20.0
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 1.20.0
55
- - !ruby/object:Gem::Dependency
56
- name: relaton-index
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: 0.2.16
62
- type: :runtime
47
+ version: 0.21.2
48
+ type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: 0.2.16
54
+ version: 0.21.2
69
55
  - !ruby/object:Gem::Dependency
70
- name: pubid-core
56
+ name: relaton-logger
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: 1.12.10
61
+ version: 0.2.0
76
62
  type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: 1.12.10
68
+ version: 0.2.0
83
69
  description: Library for importing and caching bibliographic references to technical
84
70
  standards
85
71
  email:
@@ -95,7 +81,11 @@ files:
95
81
  - lib/relaton-core.rb
96
82
  - lib/relaton/core.rb
97
83
  - lib/relaton/core/data_fetcher.rb
84
+ - lib/relaton/core/hit.rb
85
+ - lib/relaton/core/hit_collection.rb
86
+ - lib/relaton/core/processor.rb
98
87
  - lib/relaton/core/version.rb
88
+ - lib/relaton/core/workers_pool.rb
99
89
  homepage: https://github.com/relaton/relaton-core
100
90
  licenses:
101
91
  - BSD-2-Clause
@@ -108,14 +98,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
98
  requirements:
109
99
  - - ">="
110
100
  - !ruby/object:Gem::Version
111
- version: 2.5.0
101
+ version: 3.1.0
112
102
  required_rubygems_version: !ruby/object:Gem::Requirement
113
103
  requirements:
114
104
  - - ">="
115
105
  - !ruby/object:Gem::Version
116
106
  version: '0'
117
107
  requirements: []
118
- rubygems_version: 3.3.27
108
+ rubygems_version: 3.5.22
119
109
  signing_key:
120
110
  specification_version: 4
121
111
  summary: Library for importing and caching bibliographic references to technical standards