stepmod-utils 0.3.2 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.github/workflows/release.yml +7 -5
- data/.hound.yml +5 -0
- data/.rubocop.yml +10 -0
- data/Gemfile +3 -3
- data/Rakefile +1 -1
- data/exe/stepmod-annotate-all +17 -11
- data/exe/stepmod-build-resource-docs-cache +8 -7
- data/exe/stepmod-convert-express-description +6 -4
- data/exe/stepmod-convert-express-resource +6 -4
- data/exe/stepmod-extract-terms +69 -236
- data/exe/stepmod-find-express-files +8 -7
- data/lib/stepmod/utils/bibdata.rb +31 -35
- data/lib/stepmod/utils/cleaner.rb +3 -3
- data/lib/stepmod/utils/concept.rb +86 -30
- data/lib/stepmod/utils/converters/a.rb +7 -10
- data/lib/stepmod/utils/converters/blockquote.rb +4 -4
- data/lib/stepmod/utils/converters/br.rb +1 -1
- data/lib/stepmod/utils/converters/bypass.rb +33 -33
- data/lib/stepmod/utils/converters/clause_ref.rb +3 -3
- data/lib/stepmod/utils/converters/code.rb +1 -1
- data/lib/stepmod/utils/converters/comment.rb +5 -3
- data/lib/stepmod/utils/converters/dd.rb +2 -2
- data/lib/stepmod/utils/converters/def.rb +30 -22
- data/lib/stepmod/utils/converters/drop.rb +2 -2
- data/lib/stepmod/utils/converters/dt.rb +2 -2
- data/lib/stepmod/utils/converters/em.rb +1 -1
- data/lib/stepmod/utils/converters/em_express_description.rb +1 -1
- data/lib/stepmod/utils/converters/eqn.rb +28 -26
- data/lib/stepmod/utils/converters/example.rb +12 -2
- data/lib/stepmod/utils/converters/express_g.rb +10 -9
- data/lib/stepmod/utils/converters/express_ref.rb +2 -2
- data/lib/stepmod/utils/converters/express_ref_express_description.rb +3 -2
- data/lib/stepmod/utils/converters/ext_description.rb +4 -2
- data/lib/stepmod/utils/converters/figure.rb +20 -0
- data/lib/stepmod/utils/converters/fund_cons.rb +1 -1
- data/lib/stepmod/utils/converters/head.rb +3 -2
- data/lib/stepmod/utils/converters/hr.rb +2 -2
- data/lib/stepmod/utils/converters/ignore.rb +3 -3
- data/lib/stepmod/utils/converters/introduction.rb +1 -1
- data/lib/stepmod/utils/converters/module_ref.rb +11 -7
- data/lib/stepmod/utils/converters/note.rb +12 -2
- data/lib/stepmod/utils/converters/ol.rb +5 -5
- data/lib/stepmod/utils/converters/p.rb +2 -2
- data/lib/stepmod/utils/converters/pass_through.rb +2 -2
- data/lib/stepmod/utils/converters/q.rb +1 -1
- data/lib/stepmod/utils/converters/schema.rb +1 -1
- data/lib/stepmod/utils/converters/stem.rb +1 -1
- data/lib/stepmod/utils/converters/stepmod_ext_description.rb +30 -7
- data/lib/stepmod/utils/converters/strong.rb +5 -4
- data/lib/stepmod/utils/converters/sub.rb +2 -2
- data/lib/stepmod/utils/converters/sup.rb +2 -2
- data/lib/stepmod/utils/converters/synonym.rb +2 -2
- data/lib/stepmod/utils/converters/table.rb +7 -9
- data/lib/stepmod/utils/converters/term.rb +18 -9
- data/lib/stepmod/utils/converters/text.rb +12 -14
- data/lib/stepmod/utils/converters/uof.rb +11 -7
- data/lib/stepmod/utils/html_to_asciimath.rb +91 -90
- data/lib/stepmod/utils/smrl_description_converter.rb +34 -33
- data/lib/stepmod/utils/smrl_resource_converter.rb +51 -50
- data/lib/stepmod/utils/stepmod_definition_converter.rb +39 -37
- data/lib/stepmod/utils/stepmod_file_annotator.rb +26 -19
- data/lib/stepmod/utils/terms_extractor.rb +378 -0
- data/lib/stepmod/utils/version.rb +1 -1
- data/stepmod-utils.gemspec +12 -6
- metadata +80 -20
|
@@ -1,40 +1,40 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
4
|
-
require
|
|
5
|
-
require
|
|
6
|
-
require
|
|
7
|
-
require
|
|
8
|
-
require
|
|
9
|
-
require
|
|
10
|
-
require
|
|
11
|
-
require
|
|
12
|
-
require
|
|
13
|
-
require
|
|
14
|
-
require
|
|
15
|
-
require
|
|
16
|
-
require
|
|
17
|
-
require
|
|
18
|
-
require
|
|
19
|
-
|
|
20
|
-
require 'reverse_adoc/converters/a'
|
|
21
|
-
require 'reverse_adoc/converters/blockquote'
|
|
22
|
-
require 'reverse_adoc/converters/bypass'
|
|
23
|
-
require 'reverse_adoc/converters/br'
|
|
24
|
-
require 'reverse_adoc/converters/code'
|
|
25
|
-
require 'reverse_adoc/converters/drop'
|
|
26
|
-
require 'reverse_adoc/converters/head'
|
|
27
|
-
require 'reverse_adoc/converters/hr'
|
|
28
|
-
require 'reverse_adoc/converters/ignore'
|
|
29
|
-
require 'reverse_adoc/converters/li'
|
|
30
|
-
require 'reverse_adoc/converters/p'
|
|
31
|
-
require 'reverse_adoc/converters/pass_through'
|
|
32
|
-
require 'reverse_adoc/converters/q'
|
|
33
|
-
require 'reverse_adoc/converters/strong'
|
|
34
|
-
require 'reverse_adoc/converters/sup'
|
|
35
|
-
require 'reverse_adoc/converters/sub'
|
|
36
|
-
require 'reverse_adoc/converters/text'
|
|
3
|
+
require "reverse_adoc"
|
|
4
|
+
require "stepmod/utils/converters/arm"
|
|
5
|
+
require "stepmod/utils/converters/clause_ref"
|
|
6
|
+
require "stepmod/utils/converters/express_ref"
|
|
7
|
+
require "stepmod/utils/converters/module_ref"
|
|
8
|
+
require "stepmod/utils/converters/def"
|
|
9
|
+
require "stepmod/utils/converters/definition"
|
|
10
|
+
require "stepmod/utils/converters/em"
|
|
11
|
+
require "stepmod/utils/converters/example"
|
|
12
|
+
require "stepmod/utils/converters/note"
|
|
13
|
+
require "stepmod/utils/converters/ol"
|
|
14
|
+
require "stepmod/utils/converters/stem"
|
|
15
|
+
require "stepmod/utils/converters/stepmod_ext_description"
|
|
16
|
+
require "stepmod/utils/converters/term"
|
|
17
|
+
require "stepmod/utils/converters/synonym"
|
|
18
|
+
require "stepmod/utils/converters/uof"
|
|
19
|
+
require "stepmod/utils/converters/figure"
|
|
37
20
|
|
|
21
|
+
require "reverse_adoc/converters/a"
|
|
22
|
+
require "reverse_adoc/converters/blockquote"
|
|
23
|
+
require "reverse_adoc/converters/bypass"
|
|
24
|
+
require "reverse_adoc/converters/br"
|
|
25
|
+
require "reverse_adoc/converters/code"
|
|
26
|
+
require "reverse_adoc/converters/drop"
|
|
27
|
+
require "reverse_adoc/converters/head"
|
|
28
|
+
require "reverse_adoc/converters/hr"
|
|
29
|
+
require "reverse_adoc/converters/ignore"
|
|
30
|
+
require "reverse_adoc/converters/li"
|
|
31
|
+
require "reverse_adoc/converters/p"
|
|
32
|
+
require "reverse_adoc/converters/pass_through"
|
|
33
|
+
require "reverse_adoc/converters/q"
|
|
34
|
+
require "reverse_adoc/converters/strong"
|
|
35
|
+
require "reverse_adoc/converters/sup"
|
|
36
|
+
require "reverse_adoc/converters/sub"
|
|
37
|
+
require "reverse_adoc/converters/text"
|
|
38
38
|
|
|
39
39
|
module Stepmod
|
|
40
40
|
module Utils
|
|
@@ -49,11 +49,13 @@ module Stepmod
|
|
|
49
49
|
input
|
|
50
50
|
end
|
|
51
51
|
|
|
52
|
-
return
|
|
52
|
+
return "" unless root
|
|
53
53
|
|
|
54
54
|
ReverseAdoc.config.with(options) do
|
|
55
|
-
result = ReverseAdoc::Converters.lookup(root.name).convert(root
|
|
56
|
-
|
|
55
|
+
result = ReverseAdoc::Converters.lookup(root.name).convert(root,
|
|
56
|
+
options)
|
|
57
|
+
return "" unless result
|
|
58
|
+
|
|
57
59
|
ReverseAdoc.cleaner.tidy(result.dup)
|
|
58
60
|
end
|
|
59
61
|
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
require
|
|
2
|
-
require
|
|
3
|
-
require
|
|
1
|
+
require "json"
|
|
2
|
+
require "stepmod/utils/smrl_description_converter"
|
|
3
|
+
require "stepmod/utils/smrl_resource_converter"
|
|
4
4
|
|
|
5
5
|
module Stepmod
|
|
6
6
|
module Utils
|
|
@@ -17,35 +17,41 @@ module Stepmod
|
|
|
17
17
|
|
|
18
18
|
def call
|
|
19
19
|
match = File.basename(express_file).match('^(arm|mim|bom)\.exp$')
|
|
20
|
-
descriptions_base = match ? "#{match.captures[0]}_descriptions.xml" :
|
|
21
|
-
descriptions_file = File.join(File.dirname(express_file),
|
|
20
|
+
descriptions_base = match ? "#{match.captures[0]}_descriptions.xml" : "descriptions.xml"
|
|
21
|
+
descriptions_file = File.join(File.dirname(express_file),
|
|
22
|
+
descriptions_base)
|
|
22
23
|
output_express = File.read(express_file)
|
|
23
24
|
resource_docs_cache = JSON.parse(File.read(resource_docs_cache_file))
|
|
24
25
|
|
|
25
26
|
if File.exists?(descriptions_file)
|
|
26
27
|
descriptions = Nokogiri::XML(File.read(descriptions_file)).root
|
|
27
28
|
added_resource_descriptions = {}
|
|
28
|
-
descriptions.xpath(
|
|
29
|
+
descriptions.xpath("ext_description").each do |description|
|
|
29
30
|
# Add base resource from linked path if exists, eg "language_schema.language.wr:WR1" -> "language_schema"
|
|
30
|
-
base_linked = description[
|
|
31
|
+
base_linked = description["linkend"].to_s.split(".").first
|
|
31
32
|
if added_resource_descriptions[base_linked].nil?
|
|
32
|
-
base_reource_doc_dir = resource_docs_cache[description[
|
|
33
|
+
base_reource_doc_dir = resource_docs_cache[description["linkend"].to_s.split(".").first]
|
|
33
34
|
if base_reource_doc_dir
|
|
34
|
-
output_express << convert_from_resource_file(
|
|
35
|
+
output_express << convert_from_resource_file(
|
|
36
|
+
base_reource_doc_dir, stepmod_dir, base_linked, descriptions_file
|
|
37
|
+
)
|
|
35
38
|
end
|
|
36
39
|
added_resource_descriptions[base_linked] = true
|
|
37
40
|
end
|
|
38
|
-
resource_docs_dir = resource_docs_cache[description[
|
|
41
|
+
resource_docs_dir = resource_docs_cache[description["linkend"]]
|
|
39
42
|
# Convert content description
|
|
40
43
|
# when a schema description is available from resource.xml and also descriptions.xml, the description from resource.xml is only used.
|
|
41
44
|
# https://github.com/metanorma/annotated-express/issues/32#issuecomment-792609078
|
|
42
45
|
if description.text.strip.length.positive? && resource_docs_dir.nil?
|
|
43
|
-
output_express << convert_from_description_text(
|
|
46
|
+
output_express << convert_from_description_text(
|
|
47
|
+
descriptions_file, description
|
|
48
|
+
)
|
|
44
49
|
end
|
|
45
50
|
# Add converted description from exact linked path
|
|
46
|
-
if resource_docs_dir && added_resource_descriptions[description[
|
|
47
|
-
output_express << convert_from_resource_file(resource_docs_dir,
|
|
48
|
-
|
|
51
|
+
if resource_docs_dir && added_resource_descriptions[description["linkend"]].nil?
|
|
52
|
+
output_express << convert_from_resource_file(resource_docs_dir,
|
|
53
|
+
stepmod_dir, description["linkend"], descriptions_file)
|
|
54
|
+
added_resource_descriptions[description["linkend"]] = true
|
|
49
55
|
end
|
|
50
56
|
end
|
|
51
57
|
end
|
|
@@ -57,20 +63,21 @@ module Stepmod
|
|
|
57
63
|
|
|
58
64
|
def convert_from_description_text(descriptions_file, description)
|
|
59
65
|
Dir.chdir(File.dirname(descriptions_file)) do
|
|
60
|
-
wrapper = "<ext_descriptions>#{description
|
|
61
|
-
"\n
|
|
66
|
+
wrapper = "<ext_descriptions>#{description}</ext_descriptions>"
|
|
67
|
+
"\n#{Stepmod::Utils::SmrlDescriptionConverter.convert(wrapper)}"
|
|
62
68
|
end
|
|
63
69
|
end
|
|
64
70
|
|
|
65
71
|
def convert_from_resource_file(resource_docs_dir, stepmod_dir, linked, descriptions_file)
|
|
66
|
-
resource_docs_file = File.join(stepmod_dir,
|
|
72
|
+
resource_docs_file = File.join(stepmod_dir, "data/resource_docs",
|
|
73
|
+
resource_docs_dir, "resource.xml")
|
|
67
74
|
puts(resource_docs_file)
|
|
68
75
|
resource_docs = Nokogiri::XML(File.read(resource_docs_file)).root
|
|
69
76
|
schema = resource_docs.xpath("schema[@name='#{linked}']")
|
|
70
77
|
|
|
71
78
|
Dir.chdir(File.dirname(descriptions_file)) do
|
|
72
|
-
wrapper = "<resource>#{schema
|
|
73
|
-
"\n
|
|
79
|
+
wrapper = "<resource>#{schema}</resource>"
|
|
80
|
+
"\n#{Stepmod::Utils::SmrlResourceConverter.convert(wrapper)}"
|
|
74
81
|
end
|
|
75
82
|
end
|
|
76
83
|
end
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
require "stepmod/utils/stepmod_definition_converter"
|
|
2
|
+
require "stepmod/utils/bibdata"
|
|
3
|
+
require "stepmod/utils/concept"
|
|
4
|
+
require "glossarist"
|
|
5
|
+
require "securerandom"
|
|
6
|
+
|
|
7
|
+
ReverseAdoc.config.unknown_tags = :bypass
|
|
8
|
+
|
|
9
|
+
module Stepmod
|
|
10
|
+
module Utils
|
|
11
|
+
class TermsExtractor
|
|
12
|
+
# TODO: we may want a command line option to override this in the future
|
|
13
|
+
ACCEPTED_STAGES = %w(IS DIS FDIS TS).freeze
|
|
14
|
+
WITHDRAWN_STATUS = "withdrawn".freeze
|
|
15
|
+
|
|
16
|
+
attr_reader :stepmod_path,
|
|
17
|
+
:stepmod_dir,
|
|
18
|
+
:general_concepts,
|
|
19
|
+
:resource_concepts,
|
|
20
|
+
:parsed_bibliography,
|
|
21
|
+
:encountered_terms,
|
|
22
|
+
:cvs_mode,
|
|
23
|
+
:part_concepts,
|
|
24
|
+
:part_resources,
|
|
25
|
+
:part_modules,
|
|
26
|
+
:stdout
|
|
27
|
+
|
|
28
|
+
def self.call(stepmod_dir, stdout = $stdout)
|
|
29
|
+
new(stepmod_dir, stdout).call
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def initialize(stepmod_dir, stdout)
|
|
33
|
+
@stdout = stdout
|
|
34
|
+
@stepmod_dir = stepmod_dir
|
|
35
|
+
@stepmod_path = Pathname.new(stepmod_dir).realpath
|
|
36
|
+
@general_concepts = Glossarist::Collection.new
|
|
37
|
+
@resource_concepts = Glossarist::Collection.new
|
|
38
|
+
@parsed_bibliography = []
|
|
39
|
+
@part_concepts = []
|
|
40
|
+
@part_resources = []
|
|
41
|
+
@part_modules = []
|
|
42
|
+
@encountered_terms = {}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def log(message)
|
|
46
|
+
stdout.puts "[stepmod-utils] #{message}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def term_special_category(bibdata)
|
|
50
|
+
case bibdata.part.to_i
|
|
51
|
+
when 41, 42, 43, 44, 45, 46, 47, 51
|
|
52
|
+
true
|
|
53
|
+
when [56..112]
|
|
54
|
+
true
|
|
55
|
+
else
|
|
56
|
+
false
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def call
|
|
61
|
+
# If we are using the stepmod CVS repository, provide the revision number per file
|
|
62
|
+
@cvs_mode = if Dir.exists?(stepmod_path.join("CVS"))
|
|
63
|
+
require "ptools"
|
|
64
|
+
# ptools provides File.which
|
|
65
|
+
File.which("cvs")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
log "INFO: STEPmod directory set to #{stepmod_dir}."
|
|
69
|
+
|
|
70
|
+
if cvs_mode
|
|
71
|
+
log "INFO: STEPmod directory is a \
|
|
72
|
+
CVS repository and will detect revisions."
|
|
73
|
+
log "INFO: [CVS] Detecting file revisions can be slow, \
|
|
74
|
+
please be patient!"
|
|
75
|
+
else
|
|
76
|
+
log "INFO: STEPmod directory is not a CVS repository, \
|
|
77
|
+
skipping revision detection."
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
log "INFO: Detecting paths..."
|
|
81
|
+
|
|
82
|
+
repo_index = Nokogiri::XML(File.read(stepmod_path.join("repository_index.xml"))).root
|
|
83
|
+
|
|
84
|
+
files = []
|
|
85
|
+
|
|
86
|
+
# add module paths
|
|
87
|
+
repo_index.xpath("//module").each do |x|
|
|
88
|
+
next if x['status'] == WITHDRAWN_STATUS
|
|
89
|
+
|
|
90
|
+
path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/module.xml")
|
|
91
|
+
files << path if File.exists? path
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# add resource_docs paths
|
|
95
|
+
repo_index.xpath("//resource_doc").each do |x|
|
|
96
|
+
next if x['status'] == WITHDRAWN_STATUS
|
|
97
|
+
|
|
98
|
+
path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
|
|
99
|
+
files << path if File.exists? path
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# add business_object_models paths
|
|
103
|
+
repo_index.xpath("//business_object_model").each do |x|
|
|
104
|
+
next if x['status'] == WITHDRAWN_STATUS
|
|
105
|
+
|
|
106
|
+
path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/business_object_model.xml")
|
|
107
|
+
files << path if File.exists? path
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# add application_protocols paths
|
|
111
|
+
repo_index.xpath("//application_protocol").each do |x|
|
|
112
|
+
next if x['status'] == WITHDRAWN_STATUS
|
|
113
|
+
|
|
114
|
+
path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
|
|
115
|
+
files << path if File.exists? path
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
files.sort!.uniq!
|
|
119
|
+
process_term_files(files)
|
|
120
|
+
|
|
121
|
+
[
|
|
122
|
+
general_concepts,
|
|
123
|
+
resource_concepts,
|
|
124
|
+
parsed_bibliography,
|
|
125
|
+
part_concepts,
|
|
126
|
+
part_resources,
|
|
127
|
+
part_modules,
|
|
128
|
+
]
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
private
|
|
132
|
+
|
|
133
|
+
def process_term_files(files)
|
|
134
|
+
parsed_schema_names = {}
|
|
135
|
+
files.each do |file_path|
|
|
136
|
+
file_path = file_path.realpath
|
|
137
|
+
fpath = file_path.relative_path_from(stepmod_path)
|
|
138
|
+
|
|
139
|
+
log "INFO: Processing XML file #{fpath}"
|
|
140
|
+
current_document = Nokogiri::XML(File.read(file_path)).root
|
|
141
|
+
|
|
142
|
+
bibdata = nil
|
|
143
|
+
begin
|
|
144
|
+
bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
|
|
145
|
+
rescue StandardError
|
|
146
|
+
log "WARNING: Unknown file #{fpath}, skipped"
|
|
147
|
+
next
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
unless ACCEPTED_STAGES.include? bibdata.doctype
|
|
151
|
+
log "INFO: skipped #{bibdata.docid} as it is not \
|
|
152
|
+
one of (#{ACCEPTED_STAGES.join(', ')})."
|
|
153
|
+
next
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
if bibdata.part.to_s.empty?
|
|
157
|
+
log "FATAL: missing `part` attribute: #{fpath}"
|
|
158
|
+
log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
|
|
159
|
+
next
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
revision_string = "\n// CVS: revision not detected"
|
|
163
|
+
if cvs_mode
|
|
164
|
+
# Run `cvs status` to find out version
|
|
165
|
+
|
|
166
|
+
log "INFO: Detecting CVS revision..."
|
|
167
|
+
Dir.chdir(stepmod_path) do
|
|
168
|
+
status = `cvs status #{fpath}`
|
|
169
|
+
|
|
170
|
+
unless status.empty?
|
|
171
|
+
working_rev = status.split(/\n/).grep(/Working revision:/)
|
|
172
|
+
.first.match(/revision:\s+(.+)$/)[1]
|
|
173
|
+
repo_rev = status.split(/\n/).grep(/Repository revision:/)
|
|
174
|
+
.first.match(/revision:\t(.+)\t/)[1]
|
|
175
|
+
log "INFO: CVS working rev (#{working_rev}), \
|
|
176
|
+
repo rev (#{repo_rev})"
|
|
177
|
+
revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
|
|
178
|
+
"// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# read definitions
|
|
184
|
+
current_part_concepts = Glossarist::Collection.new
|
|
185
|
+
definition_index = 0
|
|
186
|
+
current_document.xpath("//definition").each do |definition|
|
|
187
|
+
definition_index += 1
|
|
188
|
+
term_id = definition["id"]
|
|
189
|
+
unless term_id.nil?
|
|
190
|
+
if encountered_terms[term_id]
|
|
191
|
+
log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
|
|
192
|
+
end
|
|
193
|
+
encountered_terms[term_id] = true
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Assume that definition is located in clause 3 of the ISO document
|
|
197
|
+
# in order. We really don't have a good reference here.
|
|
198
|
+
ref_clause = "3.#{definition_index}"
|
|
199
|
+
|
|
200
|
+
concept = Stepmod::Utils::Concept.parse(
|
|
201
|
+
definition,
|
|
202
|
+
reference_anchor: bibdata.anchor,
|
|
203
|
+
reference_clause: ref_clause,
|
|
204
|
+
file_path: fpath + revision_string,
|
|
205
|
+
)
|
|
206
|
+
next unless concept
|
|
207
|
+
|
|
208
|
+
if term_special_category(bibdata)
|
|
209
|
+
# log "INFO: this part is special"
|
|
210
|
+
find_or_initialize_concept(current_part_concepts, concept)
|
|
211
|
+
else
|
|
212
|
+
# log "INFO: this part is generic"
|
|
213
|
+
find_or_initialize_concept(general_concepts, concept)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
parsed_bibliography << bibdata
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
current_part_resources = Glossarist::Collection.new
|
|
220
|
+
current_part_modules_arm = {}
|
|
221
|
+
current_part_modules_mim = {}
|
|
222
|
+
|
|
223
|
+
log "INFO: FILE PATH IS #{file_path}"
|
|
224
|
+
case file_path.to_s
|
|
225
|
+
when /resource.xml$/
|
|
226
|
+
log "INFO: Processing resource.xml for #{file_path}"
|
|
227
|
+
# Assumption: every schema is only linked by a single resource_docs document.
|
|
228
|
+
current_document.xpath("//schema").each do |schema_node|
|
|
229
|
+
schema_name = schema_node["name"]
|
|
230
|
+
if parsed_schema_names[schema_name]
|
|
231
|
+
log "ERROR: We have encountered this schema before: \
|
|
232
|
+
#{schema_name} from path \
|
|
233
|
+
#{parsed_schema_names[schema_name]}, now at #{file_path}"
|
|
234
|
+
next
|
|
235
|
+
else
|
|
236
|
+
parsed_schema_names[schema_name] = file_path
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
Dir["#{stepmod_path}/resources/#{schema_name}/descriptions.xml"].each do |description_xml_path|
|
|
240
|
+
log "INFO: Processing resources schema #{description_xml_path}"
|
|
241
|
+
description_document = Nokogiri::XML(File.read(description_xml_path)).root
|
|
242
|
+
description_document.xpath("//ext_description").each do |ext_description|
|
|
243
|
+
# log "INFO: Processing linkend[#{ext_description['linkend']}]"
|
|
244
|
+
|
|
245
|
+
concept = Stepmod::Utils::Concept.parse(
|
|
246
|
+
ext_description,
|
|
247
|
+
reference_anchor: bibdata.anchor,
|
|
248
|
+
reference_clause: nil,
|
|
249
|
+
file_path: Pathname.new(description_xml_path)
|
|
250
|
+
.relative_path_from(stepmod_path),
|
|
251
|
+
)
|
|
252
|
+
next unless concept
|
|
253
|
+
|
|
254
|
+
if term_special_category(bibdata)
|
|
255
|
+
# log "INFO: this part is special"
|
|
256
|
+
find_or_initialize_concept(current_part_resources, concept)
|
|
257
|
+
else
|
|
258
|
+
# log "INFO: this part is generic"
|
|
259
|
+
find_or_initialize_concept(resource_concepts, concept)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
parsed_bibliography << bibdata
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
when /module.xml$/
|
|
268
|
+
log "INFO: Processing module.xml for #{file_path}"
|
|
269
|
+
# Assumption: every schema is only linked by a single module document.
|
|
270
|
+
# puts current_document.xpath('//module').length
|
|
271
|
+
schema_name = current_document.xpath("//module").first["name"]
|
|
272
|
+
if parsed_schema_names[schema_name]
|
|
273
|
+
log "ERROR: We have encountered this schema before: \
|
|
274
|
+
#{schema_name} from path #{parsed_schema_names[schema_name]}, \
|
|
275
|
+
now at #{file_path}"
|
|
276
|
+
next
|
|
277
|
+
else
|
|
278
|
+
parsed_schema_names[schema_name] = file_path
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
description_xml_path = "#{stepmod_path}/modules/#{schema_name}/arm_descriptions.xml"
|
|
282
|
+
log "INFO: Processing modules schema #{description_xml_path}"
|
|
283
|
+
|
|
284
|
+
if File.exists?(description_xml_path)
|
|
285
|
+
description_document = Nokogiri::XML(
|
|
286
|
+
File.read(description_xml_path),
|
|
287
|
+
)
|
|
288
|
+
.root
|
|
289
|
+
description_document.xpath("//ext_description").each do |ext_description|
|
|
290
|
+
linkend_schema = ext_description["linkend"].split(".").first
|
|
291
|
+
concept = Stepmod::Utils::Concept.parse(
|
|
292
|
+
ext_description,
|
|
293
|
+
reference_anchor: bibdata.anchor,
|
|
294
|
+
reference_clause: nil,
|
|
295
|
+
file_path: Pathname.new(description_xml_path)
|
|
296
|
+
.relative_path_from(stepmod_path),
|
|
297
|
+
)
|
|
298
|
+
next unless concept
|
|
299
|
+
|
|
300
|
+
current_part_modules_arm[linkend_schema] ||= Glossarist::Collection.new
|
|
301
|
+
find_or_initialize_concept(
|
|
302
|
+
current_part_modules_arm[linkend_schema], concept
|
|
303
|
+
)
|
|
304
|
+
# puts part_modules_arm.inspect
|
|
305
|
+
parsed_bibliography << bibdata
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
description_xml_path = "#{stepmod_path}/modules/#{schema_name}/mim_descriptions.xml"
|
|
310
|
+
log "INFO: Processing modules schema #{description_xml_path}"
|
|
311
|
+
|
|
312
|
+
if File.exists?(description_xml_path)
|
|
313
|
+
description_document = Nokogiri::XML(
|
|
314
|
+
File.read(description_xml_path),
|
|
315
|
+
)
|
|
316
|
+
.root
|
|
317
|
+
description_document.xpath("//ext_description").each do |ext_description|
|
|
318
|
+
linkend_schema = ext_description["linkend"].split(".").first
|
|
319
|
+
|
|
320
|
+
concept = Stepmod::Utils::Concept.parse(
|
|
321
|
+
ext_description,
|
|
322
|
+
reference_anchor: bibdata.anchor,
|
|
323
|
+
reference_clause: nil,
|
|
324
|
+
file_path: Pathname
|
|
325
|
+
.new(description_xml_path)
|
|
326
|
+
.relative_path_from(stepmod_path),
|
|
327
|
+
)
|
|
328
|
+
next unless concept
|
|
329
|
+
|
|
330
|
+
current_part_modules_mim[linkend_schema] ||=
|
|
331
|
+
Glossarist::Collection.new
|
|
332
|
+
find_or_initialize_concept(
|
|
333
|
+
current_part_modules_mim[linkend_schema], concept
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
parsed_bibliography << bibdata
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
log "INFO: Completed processing XML file #{fpath}"
|
|
343
|
+
if current_part_concepts.to_a.empty?
|
|
344
|
+
log "INFO: Skipping #{fpath} (#{bibdata.docid}) \
|
|
345
|
+
because it contains no concepts."
|
|
346
|
+
elsif current_part_concepts.to_a.length < 3
|
|
347
|
+
log "INFO: Skipping #{fpath} (#{bibdata.docid}) \
|
|
348
|
+
because it only has #{current_part_concepts.to_a.length} terms."
|
|
349
|
+
|
|
350
|
+
current_part_concepts.to_a.each do |x|
|
|
351
|
+
general_concepts.store(x)
|
|
352
|
+
end
|
|
353
|
+
else
|
|
354
|
+
unless current_part_concepts.to_a.empty?
|
|
355
|
+
part_concepts << [bibdata,
|
|
356
|
+
current_part_concepts]
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
unless current_part_resources.to_a.empty?
|
|
360
|
+
part_resources << [bibdata,
|
|
361
|
+
current_part_resources]
|
|
362
|
+
end
|
|
363
|
+
if (current_part_modules_arm.to_a.size +
|
|
364
|
+
current_part_modules_mim.to_a.size).positive?
|
|
365
|
+
part_modules << [bibdata, current_part_modules_arm,
|
|
366
|
+
current_part_modules_mim]
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def find_or_initialize_concept(collection, localized_concept)
|
|
372
|
+
concept = collection
|
|
373
|
+
.store(Glossarist::Concept.new(id: SecureRandom.uuid))
|
|
374
|
+
concept.add_l10n(localized_concept)
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
end
|
data/stepmod-utils.gemspec
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
require_relative
|
|
1
|
+
require_relative "lib/stepmod/utils/version"
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |spec|
|
|
4
4
|
spec.name = "stepmod-utils"
|
|
@@ -22,16 +22,22 @@ Gem::Specification.new do |spec|
|
|
|
22
22
|
|
|
23
23
|
# Specify which files should be added to the gem when it is released.
|
|
24
24
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
25
|
-
spec.files
|
|
26
|
-
`git ls-files -z`.split("\x0").reject
|
|
25
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
26
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
|
28
|
+
end
|
|
27
29
|
end
|
|
28
30
|
spec.bindir = "exe"
|
|
29
31
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
30
32
|
spec.require_paths = ["lib"]
|
|
31
33
|
|
|
32
|
-
spec.add_runtime_dependency "thor", ">= 0.20.3"
|
|
33
|
-
spec.add_runtime_dependency "reverse_adoc", ">= 0.2.9"
|
|
34
|
-
spec.add_runtime_dependency "ptools", '~> 1.3'
|
|
35
34
|
spec.add_runtime_dependency "concurrent-ruby"
|
|
35
|
+
spec.add_runtime_dependency "glossarist", "~> 0.1.0"
|
|
36
|
+
spec.add_runtime_dependency "ptools"
|
|
37
|
+
spec.add_runtime_dependency "reverse_adoc", ">= 0.2.9"
|
|
38
|
+
spec.add_runtime_dependency "thor", ">= 0.20.3"
|
|
36
39
|
spec.add_development_dependency "byebug", "~> 11.1"
|
|
40
|
+
spec.add_development_dependency "rubocop", "1.12"
|
|
41
|
+
spec.add_development_dependency "rubocop-performance"
|
|
42
|
+
spec.add_development_dependency "rubocop-rails"
|
|
37
43
|
end
|