relaton-cli 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,57 +0,0 @@
1
- require "nokogiri"
2
- require "relaton/cli/base_convertor"
3
-
4
- module Relaton
5
- module Cli
6
- class XMLConvertorNew < Relaton::Cli::BaseConvertor
7
- def to_yaml
8
- convert_and_write(file_content, :to_yaml)
9
- end
10
-
11
- # Convert to YAML
12
- #
13
- # This interface allow us to convert any XML file to YAML.
14
- # It only require us to provide a valid XML file and it can
15
- # do converstion using default attributes, but it also allow
16
- # us to provide custom options to customize this converstion
17
- # process.
18
- #
19
- # @param file [File] The complete path to a XML file
20
- # @param options [Hash] Options as hash key, value pairs.
21
- #
22
- def self.to_yaml(file, options = {})
23
- new(file, options).to_yaml
24
- end
25
-
26
- private
27
-
28
- def default_ext
29
- "yaml"
30
- end
31
-
32
- # @param content [Nokogiri::XML::Document]
33
- # @return [Hash]
34
- def convert_content(content)
35
- if content.root.name == "bibdata"
36
- # Bibdata.from_xml(content.to_s)
37
- Relaton::Cli.parse_xml(content).to_hash
38
- else
39
- # Bibcollection.from_xml(content)
40
- title = content.at("relaton-collection/title").text
41
- author = content.at("relaton-collection/contributor/organization/name").text
42
- collection = { "root" => { "title" => title, "author" => author } }
43
-
44
- collection["root"]["items"] = content.xpath("//bibdata").map do |bib|
45
- Relaton::Cli.parse_xml(bib).to_hash
46
- end
47
-
48
- collection
49
- end
50
- end
51
-
52
- def file_content
53
- Nokogiri::XML(File.read(file, encoding: "utf-8")).remove_namespaces!
54
- end
55
- end
56
- end
57
- end
@@ -1,85 +0,0 @@
1
- require "yaml"
2
- require "relaton/cli/base_convertor"
3
- require "relaton_bib"
4
-
5
- module Relaton
6
- module Cli
7
- class YAMLConvertorNew < Relaton::Cli::BaseConvertor
8
- def to_xml
9
- if writable
10
- convert_and_write(file_content, :to_xml)
11
- else
12
- convert_content(file_content).to_xml(date_format: :full, bibdata: true)
13
- end
14
- end
15
-
16
- # Convert to XML
17
- #
18
- # This interface allow us to convert any YAML file to XML.
19
- # It only require us to provide a valid YAML file and it can
20
- # do converstion using default attributes, but it also allow
21
- # us to provide custom options to customize this converstion
22
- # process.
23
- #
24
- # @param file [File] The complete path to a YAML file
25
- # @param options [Hash] Options as hash key, value pairs.
26
- #
27
- def self.to_xml(file, options = {})
28
- new(file, options).to_xml
29
- end
30
-
31
- private
32
-
33
- def default_ext
34
- "rxl"
35
- end
36
-
37
- def file_content
38
- date_to_string(YAML.load_file(file))
39
- end
40
-
41
- def date_to_string(obj)
42
- obj.is_a? Hash and
43
- return obj.inject({}){|memo,(k,v)| memo[k] = date_to_string(v); memo}
44
- obj.is_a? Array and
45
- return obj.inject([]){|memo,v | memo << date_to_string(v); memo}
46
- return obj.is_a?(Date) ? obj.to_s : obj
47
- end
48
-
49
- def convert_single_file(content)
50
- if (processor = Relaton::Registry.instance.by_type(doctype(content["docid"])))
51
- processor.hash_to_bib content
52
- else
53
- RelatonBib::BibliographicItem.new(RelatonBib::HashConverter::hash_to_bib(content))
54
- end
55
- end
56
-
57
- # @param content [Nokogiri::XML::Document]
58
- # @return [String]
59
- def doctype(docid)
60
- did = docid.is_a?(Array) ? docid.fetch(0) : docid
61
- return did["type"] if did && did["type"]
62
-
63
- did&.fetch("id")&.match(/^\w+/)&.to_s
64
- end
65
-
66
- def convert_collection(content)
67
- if content.has_key?("root")
68
- content["root"]["items"] = content["root"]["items"].map do |i|
69
- # RelatonBib::HashConverter::hash_to_bib(i)
70
- convert_single_file(i)
71
- end
72
- Relaton::BibcollectionNew.new(content["root"])
73
- end
74
- end
75
-
76
- def xml_content(_raw_file)
77
- convert_content(file_content).to_xml(date_format: :full, bibdata: true)
78
- end
79
-
80
- def convert_content(content)
81
- convert_collection(content) || convert_single_file(content)
82
- end
83
- end
84
- end
85
- end
@@ -1,92 +0,0 @@
1
- require "relaton/element_finder"
2
-
3
- module Relaton
4
- class XmlDocument
5
- include Relaton::ElementFinder
6
-
7
- def initialize(document)
8
- @document = nokogiri_document(document) || document
9
- end
10
-
11
- def parse
12
- base_attributes.merge(complex_attributes)
13
- end
14
-
15
- def self.parse(document)
16
- new(document).parse
17
- end
18
-
19
- private
20
-
21
- attr_reader :document
22
-
23
- def nokogiri_document(document)
24
- if document.class == String
25
- Nokogiri::XML(document)&.root
26
- end
27
- end
28
-
29
- def base_attributes
30
- Hash.new.tap do |attributes|
31
- elements.each {|key, xpath| attributes[key] = find_text(xpath) }
32
- end
33
- end
34
-
35
- def complex_attributes
36
- (date_attributes || {}).merge(
37
- contributor_author_organization: find_organization_for('author'),
38
- contributor_publisher_organization: find_organization_for('publisher'),
39
- )
40
- end
41
-
42
- def find_organization_for(type)
43
- find("./contributor/role[@type='#{type}']")&.parent&.
44
- at(apply_namespace("./organization/name"))&.text
45
- end
46
-
47
- def elements
48
- {
49
- title: "./title",
50
- stage: "./status",
51
- script: "./script",
52
- doctype: "./@type",
53
- edition: "./edition",
54
- abstract: "./abstract",
55
- language: "./language",
56
- uri: "./uri[not(@type)]",
57
- rxl: "./uri[@type='rxl']",
58
- xml: "./uri[@type='xml']",
59
- pdf: "./uri[@type='pdf']",
60
- doc: "./uri[@type='doc']",
61
- html: "./uri[@type='html']",
62
- docidentifier: "./docidentifier",
63
- copyright_from: "./copyright/from",
64
- copyright_owner: "./copyright/owner/organization/name",
65
- technical_committee: "./editorialgroup/technical-committee",
66
- contributor_author_role: "./contributor/role[@type='author']",
67
- contributor_publisher_role: "./contributor/role[@type='publisher']",
68
- }
69
- end
70
-
71
- def date_attributes
72
- revdate =
73
- find("./date[@type = 'published']") ||
74
- find("./date[@type = 'circulated']") ||
75
- find("./date")
76
-
77
- value = find_text("./on", revdate) || find_text("./form", revdate)
78
-
79
- if revdate && value
80
- date = if value.size > 7
81
- Date.parse(value.strip)
82
- else
83
- Date.strptime(value.strip, "%Y-%m")
84
- end
85
- { datetype: revdate["type"], revdate: date.to_s }
86
- end
87
- rescue
88
- warn "[relaton] parsing published date '#{revdate.text}' failed."
89
- { datetype: "circulated", revdate: value.strip }
90
- end
91
- end
92
- end