relaton-cli 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,9 @@
1
+ require "date"
1
2
 
2
3
  module Relaton
3
4
  class Bibdata
4
5
  ATTRIBS = %i[
5
- docid
6
+ docidentifier
6
7
  doctype
7
8
  title
8
9
  stage
@@ -16,6 +17,16 @@ module Relaton
16
17
  revdate
17
18
  abstract
18
19
  technical_committee
20
+ copyright_from
21
+ copyright_owner
22
+ contributor_author_role
23
+ contributor_author_organization
24
+ contributor_publisher_role
25
+ contributor_publisher_organization
26
+ language
27
+ script
28
+ edition
29
+ datetype
19
30
  ]
20
31
 
21
32
  attr_accessor *ATTRIBS
@@ -31,68 +42,120 @@ module Relaton
31
42
  options.each_pair do |k,v|
32
43
  send("#{k.to_s}=", v)
33
44
  end
45
+ self
46
+ end
34
47
 
35
- puts "*+"*30
36
- puts self.inspect
48
+ # From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/
49
+ FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ]
37
50
 
38
- self
51
+ def docidentifier_code
52
+ return "" if docidentifier.nil?
53
+ a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char|
54
+ result.gsub(bad_char, '-')
55
+ end
39
56
  end
40
57
 
41
- def docid_code
42
- docid.downcase.gsub(/[\s\/]/, "-") || ""
58
+ DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/
59
+ def doc_number
60
+ docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999
43
61
  end
44
62
 
45
63
  def self.from_xml(source)
46
64
 
47
65
  # bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml")
48
-
49
- datetype = source.at(ns("./date[@type]")).text
50
- revdate = source.at(ns("./date/on")).text
66
+ revdate = source.at(ns("./date[@type = 'published']")) ||
67
+ source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date"))
68
+ datetype = "circulated"
69
+ datetype = revdate["type"] if revdate
51
70
 
52
71
  new({
53
- uri: source.at(ns("./uri"))&.text,
72
+ uri: source.at(ns("./uri[not(@type)]"))&.text,
54
73
  xml: source.at(ns("./uri[@type='xml']"))&.text,
55
74
  pdf: source.at(ns("./uri[@type='pdf']"))&.text,
56
75
  html: source.at(ns("./uri[@type='html']"))&.text,
57
76
  relaton: source.at(ns("./uri[@type='relaton']"))&.text,
58
77
  doc: source.at(ns("./uri[@type='doc']"))&.text,
59
- docid: source.at(ns("./docidentifier"))&.text,
78
+ docidentifier: source.at(ns("./docidentifier"))&.text,
60
79
  title: source.at(ns("./title"))&.text,
61
80
  doctype: source.at(ns("./@type"))&.text,
62
81
  stage: source.at(ns("./status"))&.text,
63
- technical_committee: source.at(ns("./technical-committee"))&.text,
82
+ technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text,
64
83
  abstract: source.at(ns("./abstract"))&.text,
65
- revdate: Date.parse(revdate)
66
- # revdate TODO
84
+ revdate: revdate ? Date.parse(revdate.text) : nil,
85
+ language: source.at(ns("./language"))&.text,
86
+ script: source.at(ns("./script"))&.text,
87
+ edition: source.at(ns("./edition"))&.text,
88
+ copyright_from: source.at(ns("./copyright/from"))&.text,
89
+ copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text,
90
+ contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text,
91
+ contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text,
92
+ contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text,
93
+ contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text,
94
+ datetype: datetype
67
95
  })
68
96
  end
69
97
 
70
98
  def to_xml
71
- datetype = stage.casecmp("published") == 0 ? "published" : "updated"
99
+ #datetype = stage&.casecmp("published") == 0 ? "published" : "circulated"
72
100
 
73
101
  ret = "<bibdata type='#{doctype}'>\n"
102
+ ret += "<fetched>#{Date.today.to_s}</fetched>\n"
74
103
  ret += "<title>#{title}</title>\n"
104
+ ret += "<docidentifier>#{docidentifier}</docidentifier>\n" if docidentifier
75
105
  ret += "<uri>#{uri}</uri>\n" if uri
76
106
  ret += "<uri type='xml'>#{xml}</uri>\n" if xml
77
107
  ret += "<uri type='html'>#{html}</uri>\n" if html
78
108
  ret += "<uri type='pdf'>#{pdf}</uri>\n" if pdf
79
109
  ret += "<uri type='doc'>#{doc}</uri>\n" if doc
80
110
  ret += "<uri type='relaton'>#{relaton}</uri>\n" if relaton
81
- ret += "<docidentifier>#{docid}</docidentifier>\n"
111
+
112
+ ret += "<language>#{language}</language>\n"
113
+ ret += "<script>#{script}</script>\n"
114
+
115
+ if copyright_from
116
+ ret += "<copyright>"
117
+ ret += "<from>#{copyright_from}</from>\n" if copyright_from
118
+ ret += "<owner><organization><name>#{copyright_owner}</name></organization></owner>\n" if copyright_owner
119
+ ret += "</copyright>"
120
+ end
121
+
122
+ if contributor_author_role
123
+ ret += "<contributor>\n"
124
+ ret += "<role type='author'/>\n"
125
+ ret += "<organization><name>#{contributor_author_organization}</name></organization>\n"
126
+ ret += "</contributor>\n"
127
+ end
128
+
129
+ if contributor_publisher_role
130
+ ret += "<contributor>\n"
131
+ ret += "<role type='publisher'/>\n"
132
+ ret += "<organization><name>#{contributor_publisher_organization}</name></organization>\n"
133
+ ret += "</contributor>\n"
134
+ end
135
+
82
136
  ret += "<date type='#{datetype}'><on>#{revdate}</on></date>\n" if revdate
137
+ # ret += "<contributor><role type='author'/><organization><name>#{agency}</name></organization></contributor>" if agency
138
+ # ret += "<contributor><role type='publisher'/><organization><name>#{agency}</name></organization></contributor>" if agency
139
+ ret += "<edition>#{edition}</edition>\n" if edition
140
+ ret += "<language>#{language}</language>\n" if language
141
+ ret += "<script>#{script}</script>\n" if script
83
142
  ret += "<abstract>#{abstract}</abstract>\n" if abstract
84
143
  ret += "<status>#{stage}</status>\n" if stage
85
- ret += "<technical-committee>#{technical_committee}</technical-committee>\n" if technical_committee
144
+ ret += "<editorialgroup><technical-committee>#{technical_committee}</technical-committee></editorialgroup>\n" if technical_committee
86
145
  ret += "</bibdata>\n"
87
146
  end
88
147
 
89
148
  def to_h
90
149
  ATTRIBS.inject({}) do |acc, k|
91
150
  value = send(k)
92
- acc[k] = value unless value.nil?
151
+ acc[k.to_s] = value unless value.nil?
93
152
  acc
94
153
  end
95
154
  end
96
155
 
156
+ def to_yaml
157
+ to_h.to_yaml
158
+ end
159
+
97
160
  end
98
161
  end
@@ -8,9 +8,9 @@
8
8
  <div class="doc-identifier">
9
9
  <h{{ depth }}>
10
10
  {% if document.html == "" %}
11
- {{ document.docid }}
11
+ {{ document.docidentifier }}
12
12
  {% else %}
13
- <a href="{{ document.html }}">{{ document.docid }}</a>
13
+ <a href="{{ document.html }}">{{ document.docidentifier }}</a>
14
14
  {% endif %}
15
15
  </h{{ depth }}>
16
16
  </div>
@@ -0,0 +1,94 @@
1
+ require "fileutils"
2
+ require "relaton/bibdata"
3
+ require "relaton/bibcollection"
4
+ require "relaton/cli/xml_to_html_renderer"
5
+
6
+ module Relaton
7
+ module Cli
8
+ class BaseConvertor
9
+ def initialize(file, options = {})
10
+ @file = file
11
+ @options = options
12
+ @outdir = options.fetch(:outdir, nil)
13
+ @writable = options.fetch(:write, true)
14
+
15
+ install_dependencies(options[:require] || [])
16
+ end
17
+
18
+ def to_html
19
+ content = convert_to_html
20
+ write_to_a_file(content)
21
+ end
22
+
23
+ # Convert to HTML
24
+ #
25
+ # This interface expect us to provide Relaton collection XML
26
+ # as XML/RXL, and necessary styels / templates then it will be
27
+ # used convert that collection to HTML.
28
+ #
29
+ # @param file [String] Relaton collection file path
30
+ # @param style [String] Stylesheet file path for styles
31
+ # @param template [String] The liquid tempalte directory
32
+ #
33
+ def self.to_html(file, style, template)
34
+ new(file, style: style, template: template, extension: "html").to_html
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :file, :outdir, :options, :writable
40
+
41
+ def default_ext
42
+ raise "Override this method"
43
+ end
44
+
45
+ def convert_to_html
46
+ Relaton::Cli::XmlToHtmlRenderer.render(
47
+ xml_content(file),
48
+ stylesheet: options[:style],
49
+ liquid_dir: options[:template],
50
+ )
51
+ end
52
+
53
+ def xml_content(file)
54
+ File.read(file, encoding: "utf-8")
55
+ end
56
+
57
+ def install_dependencies(dependencies)
58
+ dependencies.each { |dependency| require(dependency) }
59
+ end
60
+
61
+ def convert_and_write(content, format)
62
+ content = convert_content(content)
63
+ write_to_a_file(content.send(format.to_sym))
64
+ write_to_file_collection(content, format.to_sym)
65
+ end
66
+
67
+ def write_to_a_file(content, outfile = nil)
68
+ outfile ||= Pathname.new(file).sub_ext(extension).to_s
69
+ File.open(outfile, "w:utf-8") { |file| file.write(content) }
70
+ end
71
+
72
+ def write_to_file_collection(content, format)
73
+ if outdir && content.is_a?(Relaton::Bibcollection)
74
+ FileUtils.mkdir_p(outdir)
75
+
76
+ content.items_flattened.each do |item|
77
+ collection = collection_filename(item.docidentifier_code)
78
+ write_to_a_file(item.send(format.to_sym), collection)
79
+ end
80
+ end
81
+ end
82
+
83
+ def extension
84
+ @extension ||= [".", options.fetch(:extension, default_ext)].join
85
+ end
86
+
87
+ def collection_filename(identifier)
88
+ File.join(
89
+ outdir, [@options[:prefix], identifier, extension].compact.join("")
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,83 @@
1
+ require "relaton/cli/relaton_file"
2
+ require "relaton/cli/xml_convertor"
3
+ require "relaton/cli/yaml_convertor"
4
+
5
+ module Relaton
6
+ module Cli
7
+ class Command < Thor
8
+ desc "fetch CODE", "Fetch Relaton XML for Standard identifier CODE"
9
+ option :type, aliases: :t, required: true, desc: "Type of standard to get bibliographic entry for"
10
+ option :year, aliases: :y, type: :numeric, desc: "Year the standard was published"
11
+
12
+ def fetch(code)
13
+ Relaton::Cli.relaton
14
+ say(fetch_document(code, options) || supported_type_message)
15
+ end
16
+
17
+ desc "extract Metanorma-XML-Directory Relaton-XML-Directory", "Extract Relaton XML from folder of Metanorma XML"
18
+ option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
19
+
20
+ def extract(source_dir, outdir)
21
+ Relaton::Cli::RelatonFile.extract(source_dir, outdir, options)
22
+ end
23
+
24
+ desc "concatenate SOURCE-DIR COLLECTION-FILE", "Concatenate entries in DIRECTORY (containing Relaton-XML or YAML) into a Relaton Collection"
25
+ option :title, aliases: :t, desc: "Title of resulting Relaton collection"
26
+ option :organization, aliases: :g, desc: "Organization owner of Relaton collection"
27
+
28
+ def concatenate(source_dir, outfile)
29
+ Relaton::Cli::RelatonFile.concatenate(source_dir, outfile, options)
30
+ end
31
+
32
+ desc "yaml2xml YAML", "Convert Relaton YAML into Relaton Collection XML or separate files"
33
+ option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
34
+ option :prefix, aliases: :p, desc: "Filename prefix of individual Relaton XML files, defaults to empty"
35
+ option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata XML files"
36
+ option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
37
+
38
+ def yaml2xml(filename)
39
+ Relaton::Cli::YAMLConvertor.to_xml(filename, options)
40
+ end
41
+
42
+ desc "xml2yaml XML", "Convert Relaton YAML into Relaton Bibcollection YAML (and separate files)"
43
+ option :extension, aliases: :x, desc: "File extension of Relaton YAML files, defaults to 'yaml'"
44
+ option :prefix, aliases: :p, desc: "Filename prefix of Relaton XML files, defaults to empty"
45
+ option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata YAML files"
46
+ option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
47
+
48
+ def xml2yaml(filename)
49
+ Relaton::Cli::XMLConvertor.to_yaml(filename, options)
50
+ end
51
+
52
+ desc "xml2html RELATON-INDEX-XML STYLESHEET LIQUID-TEMPLATE-DIR", "Convert Relaton Collection XML into HTML"
53
+
54
+ def xml2html(file, style, template)
55
+ Relaton::Cli::XMLConvertor.to_html(file, style, template)
56
+ end
57
+
58
+ desc "yaml2html YAML STYLESHEET LIQUID-TEMPLATE-DIR", "Concatenate Relaton YAML into HTML"
59
+
60
+ def yaml2html(file, style, template)
61
+ Relaton::Cli::YAMLConvertor.to_html(file, style, template)
62
+ end
63
+
64
+ private
65
+
66
+ def fetch_document(code, options)
67
+ if registered_types.include?(options[:type])
68
+ doc = Cli.relaton.fetch_std(code, options[:year], options[:type])
69
+ doc ? doc.to_xml : "No matching bibliographic entry found"
70
+ end
71
+ end
72
+
73
+ def supported_type_message
74
+ ["Recognised types:", registered_types.sort.join(", ")].join(" ")
75
+ end
76
+
77
+ def registered_types
78
+ @registered_types ||=
79
+ Relaton::Registry.instance.processors.each.map { |_n, pr| pr.prefix }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,153 @@
1
+ require "nokogiri"
2
+ require "pathname"
3
+
4
+ module Relaton
5
+ module Cli
6
+ class RelatonFile
7
+ def initialize(source, options = {})
8
+ @source = source
9
+ @options = options
10
+ @outdir = options.fetch(:outdir, nil)
11
+ @outfile = options.fetch(:outfile, nil)
12
+ end
13
+
14
+ def extract
15
+ extract_and_write_to_files
16
+ end
17
+
18
+ def concatenate
19
+ write_to_file(bibcollection.to_xml)
20
+ end
21
+
22
+ # Extract files
23
+ #
24
+ # This interface expect us to provide a source directory, output
25
+ # directory and custom configuration options. Then it wll extract
26
+ # Relaton XML files to output directory from the source directory
27
+ # During this process it will use custom options when available.
28
+ #
29
+ # @param source [Dir] The source directory for files
30
+ # @param outdir [Dir] The output directory for files
31
+ # @param options [Hash] Options as hash key value pair
32
+ #
33
+ def self.extract(source, outdir, options = {})
34
+ new(source, options.merge(outdir: outdir)).extract
35
+ end
36
+
37
+ # Concatenate files
38
+ #
39
+ ## This interface expect us to provide a source directory, output
40
+ # file and custom configuration options. Normally, this expect the
41
+ # source directory to contain RXL fles, but it also converts any
42
+ # YAML files to RXL and then finally combines those together.
43
+ #
44
+ # This interface also allow us to provdie options like title and
45
+ # organization and then it usage those details to generate the
46
+ # collection file.
47
+ #
48
+ # @param source [Dir] The source directory for files
49
+ # @param output [String] The collection output file
50
+ # @param options [Hash] Options as hash key value pair
51
+ #
52
+ def self.concatenate(source, outfile, options = {})
53
+ new(source, options.merge(outfile: outfile)).concatenate
54
+ end
55
+
56
+ private
57
+
58
+ attr_reader :source, :options, :outdir, :outfile
59
+
60
+ def bibcollection
61
+ ::Relaton::Bibcollection.new(
62
+ title: options[:title],
63
+ items: concatenate_files,
64
+ doctype: options[:doctype],
65
+ author: options[:organization],
66
+ )
67
+ end
68
+
69
+ def nokogiri_document(document, file = nil)
70
+ document ||= File.read(file, encoding: "utf-8")
71
+ Nokogiri.XML(document)
72
+ end
73
+
74
+ def extract_and_write_to_files
75
+ select_files_with("xml").each do |file|
76
+ xml = nokogiri_document(nil, file)
77
+ xml.remove_namespaces!
78
+
79
+ bib = xml.at("//bibdata") || next
80
+ bib.add_namespace(nil, "")
81
+
82
+ outfile = [outdir, build_filename(file, bib)].join("/")
83
+ write_to_file(bib.to_xml, outfile)
84
+ end
85
+ end
86
+
87
+ def concatenate_files
88
+ xml_files = [convert_rxl_to_xml, convert_yamls_to_xml]
89
+
90
+ xml_files.flatten.map do |xml|
91
+ doc = nokogiri_document(xml[:content])
92
+ bibdata_instance(doc, xml[:file]) if doc.root.name == "bibdata"
93
+ end.compact
94
+ end
95
+
96
+ def bibdata_instance(document, file)
97
+ document = clean_nokogiri_document(document)
98
+ bibdata = Relaton::Bibdata.from_xml(document.root)
99
+ build_bibdata_relaton(bibdata, file)
100
+
101
+ bibdata
102
+ end
103
+
104
+ def build_bibdata_relaton(bibdata, file)
105
+ ["xml", "pdf", "doc", "html"].each do |type|
106
+ file = Pathname.new(file).sub_ext(".#{type}")
107
+ bibdata.send("#{type}=", file) if File.file?(file)
108
+ end
109
+ end
110
+
111
+ # Force a namespace otherwise Nokogiri won't parse.
112
+ # The reason is we use Bibcollection's from_xml, but that one
113
+ # has an xmlns. We don't want to change the code for bibdata
114
+ # hence this hack #bibdata_doc.root['xmlns'] = "xmlns"
115
+ #
116
+ def clean_nokogiri_document(document)
117
+ document.remove_namespaces!
118
+ document.root.add_namespace(nil, "xmlns")
119
+ nokogiri_document(document.to_xml)
120
+ end
121
+
122
+ def convert_rxl_to_xml
123
+ select_files_with("{rxl}").map do |file|
124
+ { file: file, content: File.read(file, encoding: "utf-8") }
125
+ end
126
+ end
127
+
128
+ def convert_yamls_to_xml
129
+ select_files_with("yaml").map do |file|
130
+ { file: file, content: YAMLConvertor.to_xml(file, write: false) }
131
+ end
132
+ end
133
+
134
+ def select_files_with(extension)
135
+ files = File.join(source, "**", "*.#{extension}")
136
+ Dir[files].reject { |file| File.directory?(file) }
137
+ end
138
+
139
+ def write_to_file(content, output_file = nil)
140
+ output_file ||= outfile
141
+ File.open(output_file, "w:utf-8") { |file| file.write(content) }
142
+ end
143
+
144
+ def build_filename(file, document)
145
+ identifier = document&.at("./docidentifier")&.text ||
146
+ Pathname.new(File.basename(file, ".xml")).to_s
147
+
148
+ filename = identifier.sub(/^\s+/, "").sub(/\s+$/, "").gsub(/\s+/, "-")
149
+ [filename, options[:extension] || "rxl"].join(".")
150
+ end
151
+ end
152
+ end
153
+ end