relaton-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
+ require "date"
1
2
 
2
3
  module Relaton
3
4
  class Bibdata
4
5
  ATTRIBS = %i[
5
- docid
6
+ docidentifier
6
7
  doctype
7
8
  title
8
9
  stage
@@ -16,6 +17,16 @@ module Relaton
16
17
  revdate
17
18
  abstract
18
19
  technical_committee
20
+ copyright_from
21
+ copyright_owner
22
+ contributor_author_role
23
+ contributor_author_organization
24
+ contributor_publisher_role
25
+ contributor_publisher_organization
26
+ language
27
+ script
28
+ edition
29
+ datetype
19
30
  ]
20
31
 
21
32
  attr_accessor *ATTRIBS
@@ -31,68 +42,120 @@ module Relaton
31
42
  options.each_pair do |k,v|
32
43
  send("#{k.to_s}=", v)
33
44
  end
45
+ self
46
+ end
34
47
 
35
- puts "*+"*30
36
- puts self.inspect
48
+ # From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/
49
+ FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ]
37
50
 
38
- self
51
+ def docidentifier_code
52
+ return "" if docidentifier.nil?
53
+ a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char|
54
+ result.gsub(bad_char, '-')
55
+ end
39
56
  end
40
57
 
41
- def docid_code
42
- docid.downcase.gsub(/[\s\/]/, "-") || ""
58
+ DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/
59
+ def doc_number
60
+ docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999
43
61
  end
44
62
 
45
63
  def self.from_xml(source)
46
64
 
47
65
  # bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml")
48
-
49
- datetype = source.at(ns("./date[@type]")).text
50
- revdate = source.at(ns("./date/on")).text
66
+ revdate = source.at(ns("./date[@type = 'published']")) ||
67
+ source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date"))
68
+ datetype = "circulated"
69
+ datetype = revdate["type"] if revdate
51
70
 
52
71
  new({
53
- uri: source.at(ns("./uri"))&.text,
72
+ uri: source.at(ns("./uri[not(@type)]"))&.text,
54
73
  xml: source.at(ns("./uri[@type='xml']"))&.text,
55
74
  pdf: source.at(ns("./uri[@type='pdf']"))&.text,
56
75
  html: source.at(ns("./uri[@type='html']"))&.text,
57
76
  relaton: source.at(ns("./uri[@type='relaton']"))&.text,
58
77
  doc: source.at(ns("./uri[@type='doc']"))&.text,
59
- docid: source.at(ns("./docidentifier"))&.text,
78
+ docidentifier: source.at(ns("./docidentifier"))&.text,
60
79
  title: source.at(ns("./title"))&.text,
61
80
  doctype: source.at(ns("./@type"))&.text,
62
81
  stage: source.at(ns("./status"))&.text,
63
- technical_committee: source.at(ns("./technical-committee"))&.text,
82
+ technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text,
64
83
  abstract: source.at(ns("./abstract"))&.text,
65
- revdate: Date.parse(revdate)
66
- # revdate TODO
84
+ revdate: revdate ? Date.parse(revdate.text) : nil,
85
+ language: source.at(ns("./language"))&.text,
86
+ script: source.at(ns("./script"))&.text,
87
+ edition: source.at(ns("./edition"))&.text,
88
+ copyright_from: source.at(ns("./copyright/from"))&.text,
89
+ copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text,
90
+ contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text,
91
+ contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text,
92
+ contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text,
93
+ contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text,
94
+ datetype: datetype
67
95
  })
68
96
  end
69
97
 
70
98
  def to_xml
71
- datetype = stage.casecmp("published") == 0 ? "published" : "updated"
99
+ #datetype = stage&.casecmp("published") == 0 ? "published" : "circulated"
72
100
 
73
101
  ret = "<bibdata type='#{doctype}'>\n"
102
+ ret += "<fetched>#{Date.today.to_s}</fetched>\n"
74
103
  ret += "<title>#{title}</title>\n"
104
+ ret += "<docidentifier>#{docidentifier}</docidentifier>\n" if docidentifier
75
105
  ret += "<uri>#{uri}</uri>\n" if uri
76
106
  ret += "<uri type='xml'>#{xml}</uri>\n" if xml
77
107
  ret += "<uri type='html'>#{html}</uri>\n" if html
78
108
  ret += "<uri type='pdf'>#{pdf}</uri>\n" if pdf
79
109
  ret += "<uri type='doc'>#{doc}</uri>\n" if doc
80
110
  ret += "<uri type='relaton'>#{relaton}</uri>\n" if relaton
81
- ret += "<docidentifier>#{docid}</docidentifier>\n"
111
+
112
+ ret += "<language>#{language}</language>\n"
113
+ ret += "<script>#{script}</script>\n"
114
+
115
+ if copyright_from
116
+ ret += "<copyright>"
117
+ ret += "<from>#{copyright_from}</from>\n" if copyright_from
118
+ ret += "<owner><organization><name>#{copyright_owner}</name></organization></owner>\n" if copyright_owner
119
+ ret += "</copyright>"
120
+ end
121
+
122
+ if contributor_author_role
123
+ ret += "<contributor>\n"
124
+ ret += "<role type='author'/>\n"
125
+ ret += "<organization><name>#{contributor_author_organization}</name></organization>\n"
126
+ ret += "</contributor>\n"
127
+ end
128
+
129
+ if contributor_publisher_role
130
+ ret += "<contributor>\n"
131
+ ret += "<role type='publisher'/>\n"
132
+ ret += "<organization><name>#{contributor_publisher_organization}</name></organization>\n"
133
+ ret += "</contributor>\n"
134
+ end
135
+
82
136
  ret += "<date type='#{datetype}'><on>#{revdate}</on></date>\n" if revdate
137
+ # ret += "<contributor><role type='author'/><organization><name>#{agency}</name></organization></contributor>" if agency
138
+ # ret += "<contributor><role type='publisher'/><organization><name>#{agency}</name></organization></contributor>" if agency
139
+ ret += "<edition>#{edition}</edition>\n" if edition
140
+ ret += "<language>#{language}</language>\n" if language
141
+ ret += "<script>#{script}</script>\n" if script
83
142
  ret += "<abstract>#{abstract}</abstract>\n" if abstract
84
143
  ret += "<status>#{stage}</status>\n" if stage
85
- ret += "<technical-committee>#{technical_committee}</technical-committee>\n" if technical_committee
144
+ ret += "<editorialgroup><technical-committee>#{technical_committee}</technical-committee></editorialgroup>\n" if technical_committee
86
145
  ret += "</bibdata>\n"
87
146
  end
88
147
 
89
148
  def to_h
90
149
  ATTRIBS.inject({}) do |acc, k|
91
150
  value = send(k)
92
- acc[k] = value unless value.nil?
151
+ acc[k.to_s] = value unless value.nil?
93
152
  acc
94
153
  end
95
154
  end
96
155
 
156
+ def to_yaml
157
+ to_h.to_yaml
158
+ end
159
+
97
160
  end
98
161
  end
@@ -8,9 +8,9 @@
8
8
  <div class="doc-identifier">
9
9
  <h{{ depth }}>
10
10
  {% if document.html == "" %}
11
- {{ document.docid }}
11
+ {{ document.docidentifier }}
12
12
  {% else %}
13
- <a href="{{ document.html }}">{{ document.docid }}</a>
13
+ <a href="{{ document.html }}">{{ document.docidentifier }}</a>
14
14
  {% endif %}
15
15
  </h{{ depth }}>
16
16
  </div>
@@ -0,0 +1,94 @@
1
+ require "fileutils"
2
+ require "relaton/bibdata"
3
+ require "relaton/bibcollection"
4
+ require "relaton/cli/xml_to_html_renderer"
5
+
6
+ module Relaton
7
+ module Cli
8
+ class BaseConvertor
9
+ def initialize(file, options = {})
10
+ @file = file
11
+ @options = options
12
+ @outdir = options.fetch(:outdir, nil)
13
+ @writable = options.fetch(:write, true)
14
+
15
+ install_dependencies(options[:require] || [])
16
+ end
17
+
18
+ def to_html
19
+ content = convert_to_html
20
+ write_to_a_file(content)
21
+ end
22
+
23
+ # Convert to HTML
24
+ #
25
+ # This interface expect us to provide Relaton collection XML
26
+ # as XML/RXL, and necessary styels / templates then it will be
27
+ # used convert that collection to HTML.
28
+ #
29
+ # @param file [String] Relaton collection file path
30
+ # @param style [String] Stylesheet file path for styles
31
+ # @param template [String] The liquid tempalte directory
32
+ #
33
+ def self.to_html(file, style, template)
34
+ new(file, style: style, template: template, extension: "html").to_html
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :file, :outdir, :options, :writable
40
+
41
+ def default_ext
42
+ raise "Override this method"
43
+ end
44
+
45
+ def convert_to_html
46
+ Relaton::Cli::XmlToHtmlRenderer.render(
47
+ xml_content(file),
48
+ stylesheet: options[:style],
49
+ liquid_dir: options[:template],
50
+ )
51
+ end
52
+
53
+ def xml_content(file)
54
+ File.read(file, encoding: "utf-8")
55
+ end
56
+
57
+ def install_dependencies(dependencies)
58
+ dependencies.each { |dependency| require(dependency) }
59
+ end
60
+
61
+ def convert_and_write(content, format)
62
+ content = convert_content(content)
63
+ write_to_a_file(content.send(format.to_sym))
64
+ write_to_file_collection(content, format.to_sym)
65
+ end
66
+
67
+ def write_to_a_file(content, outfile = nil)
68
+ outfile ||= Pathname.new(file).sub_ext(extension).to_s
69
+ File.open(outfile, "w:utf-8") { |file| file.write(content) }
70
+ end
71
+
72
+ def write_to_file_collection(content, format)
73
+ if outdir && content.is_a?(Relaton::Bibcollection)
74
+ FileUtils.mkdir_p(outdir)
75
+
76
+ content.items_flattened.each do |item|
77
+ collection = collection_filename(item.docidentifier_code)
78
+ write_to_a_file(item.send(format.to_sym), collection)
79
+ end
80
+ end
81
+ end
82
+
83
+ def extension
84
+ @extension ||= [".", options.fetch(:extension, default_ext)].join
85
+ end
86
+
87
+ def collection_filename(identifier)
88
+ File.join(
89
+ outdir, [@options[:prefix], identifier, extension].compact.join("")
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,83 @@
1
+ require "relaton/cli/relaton_file"
2
+ require "relaton/cli/xml_convertor"
3
+ require "relaton/cli/yaml_convertor"
4
+
5
+ module Relaton
6
+ module Cli
7
+ class Command < Thor
8
+ desc "fetch CODE", "Fetch Relaton XML for Standard identifier CODE"
9
+ option :type, aliases: :t, required: true, desc: "Type of standard to get bibliographic entry for"
10
+ option :year, aliases: :y, type: :numeric, desc: "Year the standard was published"
11
+
12
+ def fetch(code)
13
+ Relaton::Cli.relaton
14
+ say(fetch_document(code, options) || supported_type_message)
15
+ end
16
+
17
+ desc "extract Metanorma-XML-Directory Relaton-XML-Directory", "Extract Relaton XML from folder of Metanorma XML"
18
+ option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
19
+
20
+ def extract(source_dir, outdir)
21
+ Relaton::Cli::RelatonFile.extract(source_dir, outdir, options)
22
+ end
23
+
24
+ desc "concatenate SOURCE-DIR COLLECTION-FILE", "Concatenate entries in DIRECTORY (containing Relaton-XML or YAML) into a Relaton Collection"
25
+ option :title, aliases: :t, desc: "Title of resulting Relaton collection"
26
+ option :organization, aliases: :g, desc: "Organization owner of Relaton collection"
27
+
28
+ def concatenate(source_dir, outfile)
29
+ Relaton::Cli::RelatonFile.concatenate(source_dir, outfile, options)
30
+ end
31
+
32
+ desc "yaml2xml YAML", "Convert Relaton YAML into Relaton Collection XML or separate files"
33
+ option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
34
+ option :prefix, aliases: :p, desc: "Filename prefix of individual Relaton XML files, defaults to empty"
35
+ option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata XML files"
36
+ option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
37
+
38
+ def yaml2xml(filename)
39
+ Relaton::Cli::YAMLConvertor.to_xml(filename, options)
40
+ end
41
+
42
+ desc "xml2yaml XML", "Convert Relaton YAML into Relaton Bibcollection YAML (and separate files)"
43
+ option :extension, aliases: :x, desc: "File extension of Relaton YAML files, defaults to 'yaml'"
44
+ option :prefix, aliases: :p, desc: "Filename prefix of Relaton XML files, defaults to empty"
45
+ option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata YAML files"
46
+ option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
47
+
48
+ def xml2yaml(filename)
49
+ Relaton::Cli::XMLConvertor.to_yaml(filename, options)
50
+ end
51
+
52
+ desc "xml2html RELATON-INDEX-XML STYLESHEET LIQUID-TEMPLATE-DIR", "Convert Relaton Collection XML into HTML"
53
+
54
+ def xml2html(file, style, template)
55
+ Relaton::Cli::XMLConvertor.to_html(file, style, template)
56
+ end
57
+
58
+ desc "yaml2html YAML STYLESHEET LIQUID-TEMPLATE-DIR", "Concatenate Relaton YAML into HTML"
59
+
60
+ def yaml2html(file, style, template)
61
+ Relaton::Cli::YAMLConvertor.to_html(file, style, template)
62
+ end
63
+
64
+ private
65
+
66
+ def fetch_document(code, options)
67
+ if registered_types.include?(options[:type])
68
+ doc = Cli.relaton.fetch_std(code, options[:year], options[:type])
69
+ doc ? doc.to_xml : "No matching bibliographic entry found"
70
+ end
71
+ end
72
+
73
+ def supported_type_message
74
+ ["Recognised types:", registered_types.sort.join(", ")].join(" ")
75
+ end
76
+
77
+ def registered_types
78
+ @registered_types ||=
79
+ Relaton::Registry.instance.processors.each.map { |_n, pr| pr.prefix }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,153 @@
1
+ require "nokogiri"
2
+ require "pathname"
3
+
4
+ module Relaton
5
+ module Cli
6
+ class RelatonFile
7
+ def initialize(source, options = {})
8
+ @source = source
9
+ @options = options
10
+ @outdir = options.fetch(:outdir, nil)
11
+ @outfile = options.fetch(:outfile, nil)
12
+ end
13
+
14
+ def extract
15
+ extract_and_write_to_files
16
+ end
17
+
18
+ def concatenate
19
+ write_to_file(bibcollection.to_xml)
20
+ end
21
+
22
+ # Extract files
23
+ #
24
+ # This interface expect us to provide a source directory, output
25
+ # directory and custom configuration options. Then it wll extract
26
+ # Relaton XML files to output directory from the source directory
27
+ # During this process it will use custom options when available.
28
+ #
29
+ # @param source [Dir] The source directory for files
30
+ # @param outdir [Dir] The output directory for files
31
+ # @param options [Hash] Options as hash key value pair
32
+ #
33
+ def self.extract(source, outdir, options = {})
34
+ new(source, options.merge(outdir: outdir)).extract
35
+ end
36
+
37
+ # Concatenate files
38
+ #
39
+ ## This interface expect us to provide a source directory, output
40
+ # file and custom configuration options. Normally, this expect the
41
+ # source directory to contain RXL fles, but it also converts any
42
+ # YAML files to RXL and then finally combines those together.
43
+ #
44
+ # This interface also allow us to provdie options like title and
45
+ # organization and then it usage those details to generate the
46
+ # collection file.
47
+ #
48
+ # @param source [Dir] The source directory for files
49
+ # @param output [String] The collection output file
50
+ # @param options [Hash] Options as hash key value pair
51
+ #
52
+ def self.concatenate(source, outfile, options = {})
53
+ new(source, options.merge(outfile: outfile)).concatenate
54
+ end
55
+
56
+ private
57
+
58
+ attr_reader :source, :options, :outdir, :outfile
59
+
60
+ def bibcollection
61
+ ::Relaton::Bibcollection.new(
62
+ title: options[:title],
63
+ items: concatenate_files,
64
+ doctype: options[:doctype],
65
+ author: options[:organization],
66
+ )
67
+ end
68
+
69
+ def nokogiri_document(document, file = nil)
70
+ document ||= File.read(file, encoding: "utf-8")
71
+ Nokogiri.XML(document)
72
+ end
73
+
74
+ def extract_and_write_to_files
75
+ select_files_with("xml").each do |file|
76
+ xml = nokogiri_document(nil, file)
77
+ xml.remove_namespaces!
78
+
79
+ bib = xml.at("//bibdata") || next
80
+ bib.add_namespace(nil, "")
81
+
82
+ outfile = [outdir, build_filename(file, bib)].join("/")
83
+ write_to_file(bib.to_xml, outfile)
84
+ end
85
+ end
86
+
87
+ def concatenate_files
88
+ xml_files = [convert_rxl_to_xml, convert_yamls_to_xml]
89
+
90
+ xml_files.flatten.map do |xml|
91
+ doc = nokogiri_document(xml[:content])
92
+ bibdata_instance(doc, xml[:file]) if doc.root.name == "bibdata"
93
+ end.compact
94
+ end
95
+
96
+ def bibdata_instance(document, file)
97
+ document = clean_nokogiri_document(document)
98
+ bibdata = Relaton::Bibdata.from_xml(document.root)
99
+ build_bibdata_relaton(bibdata, file)
100
+
101
+ bibdata
102
+ end
103
+
104
+ def build_bibdata_relaton(bibdata, file)
105
+ ["xml", "pdf", "doc", "html"].each do |type|
106
+ file = Pathname.new(file).sub_ext(".#{type}")
107
+ bibdata.send("#{type}=", file) if File.file?(file)
108
+ end
109
+ end
110
+
111
+ # Force a namespace otherwise Nokogiri won't parse.
112
+ # The reason is we use Bibcollection's from_xml, but that one
113
+ # has an xmlns. We don't want to change the code for bibdata
114
+ # hence this hack #bibdata_doc.root['xmlns'] = "xmlns"
115
+ #
116
+ def clean_nokogiri_document(document)
117
+ document.remove_namespaces!
118
+ document.root.add_namespace(nil, "xmlns")
119
+ nokogiri_document(document.to_xml)
120
+ end
121
+
122
+ def convert_rxl_to_xml
123
+ select_files_with("{rxl}").map do |file|
124
+ { file: file, content: File.read(file, encoding: "utf-8") }
125
+ end
126
+ end
127
+
128
+ def convert_yamls_to_xml
129
+ select_files_with("yaml").map do |file|
130
+ { file: file, content: YAMLConvertor.to_xml(file, write: false) }
131
+ end
132
+ end
133
+
134
+ def select_files_with(extension)
135
+ files = File.join(source, "**", "*.#{extension}")
136
+ Dir[files].reject { |file| File.directory?(file) }
137
+ end
138
+
139
+ def write_to_file(content, output_file = nil)
140
+ output_file ||= outfile
141
+ File.open(output_file, "w:utf-8") { |file| file.write(content) }
142
+ end
143
+
144
+ def build_filename(file, document)
145
+ identifier = document&.at("./docidentifier")&.text ||
146
+ Pathname.new(File.basename(file, ".xml")).to_s
147
+
148
+ filename = identifier.sub(/^\s+/, "").sub(/\s+$/, "").gsub(/\s+/, "-")
149
+ [filename, options[:extension] || "rxl"].join(".")
150
+ end
151
+ end
152
+ end
153
+ end