relaton-iso 1.20.0 → 2.0.0.pre.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile +1 -0
- data/README.adoc +134 -130
- data/bin/console +1 -1
- data/grammars/basicdoc.rng +2110 -0
- data/grammars/biblio-standoc.rng +287 -0
- data/grammars/biblio.rng +2097 -0
- data/grammars/relaton-iso-compile.rng +11 -0
- data/grammars/relaton-iso.rng +214 -0
- data/lib/relaton/iso/bibliography.rb +206 -0
- data/lib/relaton/iso/data_fetcher.rb +227 -0
- data/lib/relaton/iso/hash_parser_v1.rb +121 -0
- data/lib/relaton/iso/hit.rb +62 -0
- data/lib/relaton/iso/hit_collection.rb +117 -0
- data/lib/relaton/iso/item_data.rb +49 -0
- data/lib/relaton/iso/model/bibdata.rb +9 -0
- data/lib/relaton/iso/model/bibitem.rb +7 -0
- data/lib/relaton/iso/model/contributor.rb +7 -0
- data/lib/relaton/iso/model/contributor_info.rb +9 -0
- data/lib/relaton/iso/model/docidentifier.rb +128 -0
- data/lib/relaton/iso/model/doctype.rb +13 -0
- data/lib/relaton/iso/model/ext.rb +47 -0
- data/lib/relaton/iso/model/iso_project_group.rb +21 -0
- data/lib/relaton/iso/model/item.rb +17 -0
- data/lib/relaton/iso/model/item_base.rb +19 -0
- data/lib/relaton/iso/model/organization.rb +9 -0
- data/lib/relaton/iso/model/project_number.rb +22 -0
- data/lib/relaton/iso/model/relation.rb +9 -0
- data/lib/relaton/iso/model/stagename.rb +14 -0
- data/lib/relaton/iso/model/structured_identifier.rb +31 -0
- data/lib/relaton/iso/processor.rb +78 -0
- data/lib/relaton/iso/queue.rb +63 -0
- data/lib/relaton/iso/scraper.rb +591 -0
- data/lib/relaton/iso/util.rb +8 -0
- data/lib/relaton/iso/version.rb +7 -0
- data/lib/relaton/iso.rb +17 -0
- data/relaton_iso.gemspec +9 -7
- metadata +76 -46
- data/bin/bundle +0 -109
- data/bin/byebug +0 -27
- data/bin/coderay +0 -27
- data/bin/gdb_wrapper +0 -29
- data/bin/htmldiff +0 -27
- data/bin/httpclient +0 -29
- data/bin/ldiff +0 -27
- data/bin/nokogiri +0 -27
- data/bin/pry +0 -27
- data/bin/pubid-nist +0 -27
- data/bin/racc +0 -27
- data/bin/rackup +0 -29
- data/bin/rake +0 -27
- data/bin/rubocop +0 -27
- data/bin/ruby-parse +0 -27
- data/bin/ruby-rewrite +0 -27
- data/bin/safe_yaml +0 -29
- data/bin/thor +0 -27
- data/lib/relaton_iso/data_fetcher.rb +0 -246
- data/lib/relaton_iso/document_identifier.rb +0 -46
- data/lib/relaton_iso/hash_converter.rb +0 -15
- data/lib/relaton_iso/hit.rb +0 -59
- data/lib/relaton_iso/hit_collection.rb +0 -100
- data/lib/relaton_iso/iso_bibliography.rb +0 -202
- data/lib/relaton_iso/processor.rb +0 -67
- data/lib/relaton_iso/queue.rb +0 -61
- data/lib/relaton_iso/scrapper.rb +0 -553
- data/lib/relaton_iso/util.rb +0 -6
- data/lib/relaton_iso/version.rb +0 -5
- data/lib/relaton_iso.rb +0 -17
@@ -0,0 +1,14 @@
|
|
1
|
+
module Relaton
|
2
|
+
module Iso
|
3
|
+
class Stagename < Lutaml::Model::Serializable
|
4
|
+
attribute :abbreviation, :string
|
5
|
+
attribute :content, :string
|
6
|
+
|
7
|
+
xml do
|
8
|
+
root "stagename"
|
9
|
+
map_attribute "abbreviation", to: :abbreviation
|
10
|
+
map_content to: :content
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative "project_number"
|
2
|
+
|
3
|
+
module Relaton
|
4
|
+
module Iso
|
5
|
+
class StructuredIdentifier < Lutaml::Model::Serializable
|
6
|
+
attribute :type, :string
|
7
|
+
attribute :project_number, ProjectNumber
|
8
|
+
attribute :tc_document_number, :integer
|
9
|
+
|
10
|
+
xml do
|
11
|
+
root "structuredidentifier"
|
12
|
+
map_attribute "type", to: :type
|
13
|
+
map_element "project-number", to: :project_number
|
14
|
+
map_element "tc-document-number", to: :tc_document_number
|
15
|
+
end
|
16
|
+
|
17
|
+
def remove_date!
|
18
|
+
project_number&.origyr = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def remove_part!
|
22
|
+
project_number&.part = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_all_parts!
|
26
|
+
remove_date!
|
27
|
+
remove_part!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Relaton
|
2
|
+
module Iso
|
3
|
+
class Processor < Relaton::Core::Processor
|
4
|
+
attr_reader :idtype
|
5
|
+
|
6
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
7
|
+
@short = :relaton_iso
|
8
|
+
@prefix = "ISO"
|
9
|
+
@defaultprefix = %r{^ISO(/IEC)?\s}
|
10
|
+
@idtype = "ISO"
|
11
|
+
@datasets = %w[iso-ics]
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param code [String]
|
15
|
+
# @param date [String, nil] year
|
16
|
+
# @param opts [Hash]
|
17
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
18
|
+
def get(code, date, opts)
|
19
|
+
require_relative "../iso"
|
20
|
+
Bibliography.get(code, date, opts)
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Fetch all the documents from https://www.iso.org/standards-catalogue/browse-by-ics.html
|
25
|
+
#
|
26
|
+
# @param [String] source source name (iso-rss, iso-rss-all)
|
27
|
+
# @param [Hash] opts
|
28
|
+
# @option opts [String] :output directory to output documents
|
29
|
+
# @option opts [String] :format output format (xml, yaml, bibxml)
|
30
|
+
#
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
require_relative "data_fetcher"
|
33
|
+
DataFetcher.fetch(**opts)
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param xml [String]
|
37
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
38
|
+
def from_xml(xml)
|
39
|
+
require_relative "../iso"
|
40
|
+
Item.from_xml xml
|
41
|
+
end
|
42
|
+
|
43
|
+
def from_yaml(yaml)
|
44
|
+
require_relative "../iso"
|
45
|
+
Item.from_yaml yaml
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param hash [Hash]
|
49
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
50
|
+
# def hash_to_bib(hash)
|
51
|
+
# item_hash = HashConverter.hash_to_bib(hash)
|
52
|
+
# ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
|
53
|
+
# end
|
54
|
+
|
55
|
+
# Returns hash of XML grammar
|
56
|
+
# @return [String]
|
57
|
+
def grammar_hash
|
58
|
+
require "relaton/bib/version"
|
59
|
+
require_relative "version"
|
60
|
+
Digest::MD5.hexdigest Relaton::Iso::VERSION + Relaton::Bib::VERSION
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns number of workers
|
64
|
+
# @return [Integer]
|
65
|
+
def threads
|
66
|
+
3
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Remove index file
|
71
|
+
#
|
72
|
+
def remove_index_file
|
73
|
+
require_relative "hit_collection"
|
74
|
+
Relaton::Index.find_or_create(:iso, url: true, file: "#{HitCollection::INDEXFILE}.yaml").remove_file
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Relaton
|
2
|
+
module Iso
|
3
|
+
#
|
4
|
+
# Queue of links to fetch.
|
5
|
+
#
|
6
|
+
class Queue
|
7
|
+
extend Forwardable
|
8
|
+
def_delegator :queue, :[]
|
9
|
+
|
10
|
+
FILE = "iso-queue.txt".freeze
|
11
|
+
|
12
|
+
#
|
13
|
+
# Open queue file if exist. If not, create new empty queue.
|
14
|
+
#
|
15
|
+
# @return [Array<String>] queue
|
16
|
+
#
|
17
|
+
def queue
|
18
|
+
@queue ||= File.exist?(FILE) ? File.read(FILE).split("\n") : []
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Add item to queue at first position if it is not already there.
|
23
|
+
#
|
24
|
+
# @param [String] item item to add
|
25
|
+
#
|
26
|
+
# @return [void]
|
27
|
+
#
|
28
|
+
def add_first(item)
|
29
|
+
queue.unshift item unless queue.include? item
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Move or add item to the end of the queue.
|
34
|
+
#
|
35
|
+
# @param [String] item item to move or add
|
36
|
+
#
|
37
|
+
# @return [void]
|
38
|
+
#
|
39
|
+
def move_last(item)
|
40
|
+
queue.delete item
|
41
|
+
queue << item
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Take first item from the queue.
|
46
|
+
#
|
47
|
+
# @return [String] an item
|
48
|
+
#
|
49
|
+
# def take_first
|
50
|
+
# queue.shift
|
51
|
+
# end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Save queue to file.
|
55
|
+
#
|
56
|
+
# @return [void]
|
57
|
+
#
|
58
|
+
def save
|
59
|
+
File.write FILE, queue.to_a.join("\n")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|