iev 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -1
- data/.github/workflows/release.yml +25 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +0 -2
- data/README.adoc +4 -4
- data/exe/iev-glossarist +21 -0
- data/iev.gemspec +12 -3
- data/lib/iev/cli/command.rb +109 -0
- data/lib/iev/cli/command_helper.rb +83 -0
- data/lib/iev/cli/ui.rb +70 -0
- data/lib/iev/cli.rb +22 -0
- data/lib/iev/converter/mathml_to_asciimath.rb +197 -0
- data/lib/iev/converter.rb +9 -0
- data/lib/iev/data_conversions.rb +39 -0
- data/lib/iev/db.rb +3 -3
- data/lib/iev/db_cache.rb +2 -2
- data/lib/iev/db_writer.rb +81 -0
- data/lib/iev/iso_639_2.yaml +4075 -0
- data/lib/iev/iso_639_code.rb +47 -0
- data/lib/iev/profiler.rb +69 -0
- data/lib/iev/relaton_db.rb +63 -0
- data/lib/iev/source_parser.rb +350 -0
- data/lib/iev/supersession_parser.rb +70 -0
- data/lib/iev/term_attrs_parser.rb +143 -0
- data/lib/iev/term_builder.rb +313 -0
- data/lib/iev/utilities.rb +58 -0
- data/lib/iev/version.rb +2 -2
- data/lib/iev.rb +24 -2
- metadata +153 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5909046a19d741d4242dd6823366923efb0607de3ebf89fdcb99465310b6150
|
4
|
+
data.tar.gz: 55812547af56e58d9ea68a1cc7413632558c51a8d3b0d22b48ee61e81afef390
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e5cca48906d1f65292cc58887b89564ae293da6cd081e47e70495d11f43c606b6db11d7d99db92210909b4fc3ccfcb54ffdb003191c17134f15c921cbded6c0
|
7
|
+
data.tar.gz: 02f500f0bf2c1da875402a5f26f709d86ee7a4924fd4dd61796e653e260cd52d6c88fc37c9403bf4db479bdeb59664bc2cd599eaa00f40012ca0893a35a96164
|
data/.github/workflows/rake.yml
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: release
|
4
|
+
|
5
|
+
on:
|
6
|
+
workflow_dispatch:
|
7
|
+
inputs:
|
8
|
+
next_version:
|
9
|
+
description: |
|
10
|
+
Next release version. Possible values: x.y.z, major, minor, patch (or pre|rc|etc).
|
11
|
+
Also, you can pass 'skip' to skip 'git tag' and do 'gem push' for the current version
|
12
|
+
required: true
|
13
|
+
default: 'skip'
|
14
|
+
repository_dispatch:
|
15
|
+
types: [ do-release ]
|
16
|
+
|
17
|
+
jobs:
|
18
|
+
release:
|
19
|
+
uses: metanorma/ci/.github/workflows/rubygems-release.yml@main
|
20
|
+
with:
|
21
|
+
next_version: ${{ github.event.inputs.next_version }}
|
22
|
+
secrets:
|
23
|
+
rubygems-api-key: ${{ secrets.METANORMA_CI_RUBYGEMS_API_KEY }}
|
24
|
+
pat_token: ${{ secrets.METANORMA_CI_PAT_TOKEN }}
|
25
|
+
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
=
|
1
|
+
= IEV
|
2
2
|
|
3
3
|
image:https://img.shields.io/gem/v/iev.svg["Gem Version", link="https://rubygems.org/gems/iev"]
|
4
4
|
image:https://github.com/metanorma/iev/workflows/rake/badge.svg["Build Status", link="https://github.com/metanorma/iev/actions?query=workflow%3Arake"]
|
@@ -31,15 +31,15 @@ Or install it yourself as:
|
|
31
31
|
----
|
32
32
|
# Get term
|
33
33
|
|
34
|
-
|
34
|
+
IEV.get("103-01-02", "en")
|
35
35
|
=> "functional"
|
36
36
|
|
37
37
|
# If code not found
|
38
|
-
|
38
|
+
IEV.get("111-11-11", "en")
|
39
39
|
=> ""
|
40
40
|
|
41
41
|
# If language not found
|
42
|
-
|
42
|
+
IEV.get("103-01-02", "eee")
|
43
43
|
=> nil
|
44
44
|
----
|
45
45
|
|
data/exe/iev-glossarist
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
#
|
4
|
+
# resolve bin path, ignoring symlinks
|
5
|
+
require "pathname"
|
6
|
+
bin_file = Pathname.new(__FILE__).realpath
|
7
|
+
|
8
|
+
# add self to libpath
|
9
|
+
$:.unshift File.expand_path("../../lib", bin_file)
|
10
|
+
|
11
|
+
# Fixes https://github.com/rubygems/rubygems/issues/1420
|
12
|
+
require "rubygems/specification"
|
13
|
+
|
14
|
+
class Gem::Specification
|
15
|
+
def this; self; end
|
16
|
+
end
|
17
|
+
|
18
|
+
require "iev"
|
19
|
+
require "iev/cli"
|
20
|
+
|
21
|
+
IEV::CLI.start(ARGV)
|
data/iev.gemspec
CHANGED
@@ -4,7 +4,7 @@ require "iev/version"
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "iev"
|
7
|
-
spec.version =
|
7
|
+
spec.version = IEV::VERSION
|
8
8
|
spec.authors = ["Ribose Inc."]
|
9
9
|
spec.email = ["open.source@ribose.com"]
|
10
10
|
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.bindir = "exe"
|
20
20
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
21
|
spec.require_paths = ["lib"]
|
22
|
-
spec.required_ruby_version = Gem::Requirement.new(">=
|
22
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler"
|
25
25
|
spec.add_development_dependency "debug"
|
@@ -27,5 +27,14 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_development_dependency "rspec", "~> 3.0"
|
28
28
|
spec.add_development_dependency "simplecov"
|
29
29
|
|
30
|
-
spec.add_dependency "
|
30
|
+
spec.add_dependency "creek", "~> 2.5"
|
31
|
+
spec.add_dependency "glossarist", "~> 2.0.6"
|
32
|
+
spec.add_dependency "unitsml"
|
33
|
+
spec.add_dependency "plurimath"
|
34
|
+
spec.add_dependency "nokogiri", ">= 1.16.5"
|
35
|
+
spec.add_dependency "relaton", "~> 1.18"
|
36
|
+
spec.add_dependency "sequel", "~> 5.40"
|
37
|
+
spec.add_dependency "sqlite3", "~> 1.7.0"
|
38
|
+
spec.add_dependency "thor", "~> 1.0"
|
39
|
+
spec.add_dependency "zeitwerk", "~> 2.4"
|
31
40
|
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
module CLI
|
8
|
+
class Command < Thor
|
9
|
+
include CommandHelper
|
10
|
+
|
11
|
+
desc "xlsx2yaml FILE", "Converts Excel IEV exports to YAMLs."
|
12
|
+
def xlsx2yaml(file)
|
13
|
+
handle_generic_options(options)
|
14
|
+
db = Sequel.sqlite
|
15
|
+
DbWriter.new(db).import_spreadsheet(file)
|
16
|
+
ds = filter_dataset(db, options)
|
17
|
+
collection = build_collection_from_dataset(ds)
|
18
|
+
save_collection_to_files(collection, options[:output])
|
19
|
+
summary
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "xlsx2db FILE", "Imports Excel to SQLite database."
|
23
|
+
def xlsx2db(file)
|
24
|
+
handle_generic_options(options)
|
25
|
+
# Instantiating an in-memory db and dumping it later is faster than
|
26
|
+
# just working on file db.
|
27
|
+
db = Sequel.sqlite
|
28
|
+
DbWriter.new(db).import_spreadsheet(file)
|
29
|
+
save_db_to_file(db, options[:output])
|
30
|
+
summary
|
31
|
+
end
|
32
|
+
|
33
|
+
desc "db2yaml DB_FILE", "Exports SQLite to IEV YAMLs."
|
34
|
+
def db2yaml(dbfile)
|
35
|
+
handle_generic_options(options)
|
36
|
+
db = Sequel.sqlite(dbfile)
|
37
|
+
ds = filter_dataset(db, options)
|
38
|
+
collection = build_collection_from_dataset(ds)
|
39
|
+
save_collection_to_files(collection, options[:output])
|
40
|
+
summary
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.exit_on_failure?
|
44
|
+
true
|
45
|
+
end
|
46
|
+
|
47
|
+
# Options must be declared at the bottom because Thor must have commands
|
48
|
+
# defined in advance.
|
49
|
+
|
50
|
+
def self.shared_option(name, methods:, **kwargs)
|
51
|
+
[*methods].each { |m| option name, for: m, **kwargs }
|
52
|
+
end
|
53
|
+
|
54
|
+
shared_option :only_concepts,
|
55
|
+
desc: "Only process concepts with IEVREF matching this argument, " +
|
56
|
+
"'%' and '_' wildcards are supported and have meaning as in SQL " +
|
57
|
+
"LIKE operator",
|
58
|
+
methods: %i[xlsx2yaml db2yaml]
|
59
|
+
|
60
|
+
shared_option :only_languages,
|
61
|
+
desc: "Only export these languages, skip concepts which aren't " +
|
62
|
+
"translated to any of them (comma-separated list, language " +
|
63
|
+
"codes must be as in spreadsheet)",
|
64
|
+
methods: %i[xlsx2yaml db2yaml]
|
65
|
+
|
66
|
+
shared_option :output,
|
67
|
+
desc: "Output directory",
|
68
|
+
aliases: :o,
|
69
|
+
default: Dir.pwd,
|
70
|
+
methods: %i[xlsx2yaml db2yaml]
|
71
|
+
|
72
|
+
shared_option :output,
|
73
|
+
desc: "Output file",
|
74
|
+
aliases: :o,
|
75
|
+
default: File.join(Dir.pwd, "concepts.sqlite3"),
|
76
|
+
methods: :xlsx2db
|
77
|
+
|
78
|
+
shared_option :progress,
|
79
|
+
type: :boolean,
|
80
|
+
desc: "Enables or disables progress indicator. By default disabled " +
|
81
|
+
"when 'CI' environment variable is set and enabled otherwise",
|
82
|
+
methods: %i[xlsx2yaml xlsx2db db2yaml]
|
83
|
+
|
84
|
+
shared_option :debug_term_attributes,
|
85
|
+
desc: "Enables debug messages about term attributes recognition",
|
86
|
+
type: :boolean,
|
87
|
+
default: false,
|
88
|
+
methods: %i[xlsx2yaml db2yaml]
|
89
|
+
|
90
|
+
shared_option :debug_sources,
|
91
|
+
desc: "Enables debug messages about authoritative sources recognition",
|
92
|
+
type: :boolean,
|
93
|
+
default: false,
|
94
|
+
methods: %i[xlsx2yaml db2yaml]
|
95
|
+
|
96
|
+
shared_option :debug_relaton,
|
97
|
+
desc: "Enables debug messages about Relaton integration",
|
98
|
+
type: :boolean,
|
99
|
+
default: false,
|
100
|
+
methods: %i[xlsx2yaml db2yaml]
|
101
|
+
|
102
|
+
shared_option :profile,
|
103
|
+
desc: "Generates profiler reports for this program, requires ruby-prof",
|
104
|
+
type: :boolean,
|
105
|
+
default: false,
|
106
|
+
methods: %i[xlsx2yaml xlsx2db db2yaml]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
module CLI
|
8
|
+
module CommandHelper
|
9
|
+
include CLI::UI
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
def save_collection_to_files(collection, output_dir)
|
14
|
+
Profiler.measure("writing-yamls") do
|
15
|
+
info "Writing concepts to files..."
|
16
|
+
path = File.expand_path("./concepts", output_dir)
|
17
|
+
FileUtils.mkdir_p(path)
|
18
|
+
collection.save_to_files(path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Note: Implementation examples here:
|
23
|
+
# https://www.rubydoc.info/github/luislavena/sqlite3-ruby/SQLite3/Backup
|
24
|
+
def save_db_to_file(src_db, dbfile)
|
25
|
+
info "Saving database to a file..."
|
26
|
+
src_db.synchronize do |src_conn|
|
27
|
+
dest_conn = SQLite3::Database.new(dbfile)
|
28
|
+
b = SQLite3::Backup.new(dest_conn, "main", src_conn, "main")
|
29
|
+
b.step(-1)
|
30
|
+
b.finish
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def summary
|
35
|
+
info "Done!"
|
36
|
+
end
|
37
|
+
|
38
|
+
def collection_file_path(file, output_dir)
|
39
|
+
output_dir.join(Pathname.new(file).basename.sub_ext(".yaml"))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Handles various generic options, e.g. detailed debug switches.
|
43
|
+
# Assigns some global variables accordingly, so these settings are
|
44
|
+
# available throughout the program.
|
45
|
+
def handle_generic_options(options)
|
46
|
+
$IEV_PROFILE = options[:profile]
|
47
|
+
$IEV_PROGRESS = options.fetch(:progress, !ENV["CI"])
|
48
|
+
|
49
|
+
$IEV_DEBUG = options.to_h.
|
50
|
+
select { |k, _| k.to_s.start_with? "debug_" }.
|
51
|
+
transform_keys { |k| k.to_s.sub("debug_", "").to_sym }
|
52
|
+
end
|
53
|
+
|
54
|
+
def filter_dataset(db, options)
|
55
|
+
query = db[:concepts]
|
56
|
+
|
57
|
+
if options[:only_concepts]
|
58
|
+
query = query.where(Sequel.ilike(:ievref, options[:only_concepts]))
|
59
|
+
end
|
60
|
+
|
61
|
+
if options[:only_languages]
|
62
|
+
query = query.where(language: options[:only_languages].split(","))
|
63
|
+
end
|
64
|
+
|
65
|
+
query
|
66
|
+
end
|
67
|
+
|
68
|
+
def build_collection_from_dataset(dataset)
|
69
|
+
Profiler.measure("building-collection") do
|
70
|
+
Glossarist::ManagedConceptCollection.new.tap do |concept_collection|
|
71
|
+
dataset.each do |row|
|
72
|
+
term = TermBuilder.build_from(row)
|
73
|
+
if term
|
74
|
+
concept = concept_collection.fetch_or_initialize(term.id)
|
75
|
+
concept.add_l10n(term)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/iev/cli/ui.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
module CLI
|
8
|
+
# @todo
|
9
|
+
# Make it thread-safe. Currently, calling UI methods from different
|
10
|
+
# threads may result with mangled output. At first glance it seems like
|
11
|
+
# something is wrong with carriage returns, but more research is needed.
|
12
|
+
module UI
|
13
|
+
module_function
|
14
|
+
|
15
|
+
def debug(*args)
|
16
|
+
Helper.cli_out(:debug, *args)
|
17
|
+
end
|
18
|
+
|
19
|
+
def warn(*args)
|
20
|
+
Helper.cli_out(:warn, *args)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Prints progress message which will be replaced on next call.
|
24
|
+
def progress(message)
|
25
|
+
return unless $IEV_PROGRESS
|
26
|
+
|
27
|
+
print "#{Helper.clear_progress}#{message} "
|
28
|
+
end
|
29
|
+
|
30
|
+
# Prints generic message.
|
31
|
+
def info(message)
|
32
|
+
print "#{Helper.clear_progress}#{message}\n"
|
33
|
+
end
|
34
|
+
|
35
|
+
# Sets an UI tag which will be prepended to messages printed with
|
36
|
+
# #debug and #warn.
|
37
|
+
def set_ui_tag(str)
|
38
|
+
Thread.current[:iev_ui_tag] = str
|
39
|
+
end
|
40
|
+
|
41
|
+
module Helper
|
42
|
+
module_function
|
43
|
+
|
44
|
+
def clear_progress
|
45
|
+
$IEV_PROGRESS ? "\r#{" " * 40}\r" : ""
|
46
|
+
end
|
47
|
+
|
48
|
+
def cli_out(level, *args)
|
49
|
+
topic = Symbol === args[0] ? args.shift : nil
|
50
|
+
message = args.map(&:to_s).join(" ").chomp
|
51
|
+
ui_tag = Thread.current[:iev_ui_tag]
|
52
|
+
|
53
|
+
return unless should_out?(level, topic)
|
54
|
+
|
55
|
+
print [
|
56
|
+
clear_progress,
|
57
|
+
ui_tag,
|
58
|
+
ui_tag && ": ",
|
59
|
+
message,
|
60
|
+
"\n",
|
61
|
+
].join
|
62
|
+
end
|
63
|
+
|
64
|
+
def should_out?(level, topic)
|
65
|
+
topic.nil? || level == :warn || $IEV_DEBUG[topic]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/iev/cli.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
module CLI
|
8
|
+
def self.start(arguments)
|
9
|
+
Signal.trap("INT") do
|
10
|
+
UI.info "Signal SIGINT received, quitting!"
|
11
|
+
Kernel.exit(1)
|
12
|
+
end
|
13
|
+
|
14
|
+
Signal.trap("TERM") do
|
15
|
+
UI.info "Signal SIGTERM received, quitting!"
|
16
|
+
Kernel.exit(1)
|
17
|
+
end
|
18
|
+
|
19
|
+
IEV::CLI::Command.start(arguments)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IEV
|
4
|
+
module Converter
|
5
|
+
class MathmlToAsciimath
|
6
|
+
using DataConversions
|
7
|
+
|
8
|
+
def self.convert(input)
|
9
|
+
new.convert(input)
|
10
|
+
end
|
11
|
+
|
12
|
+
def convert(input)
|
13
|
+
mathml_to_asciimath(input)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def mathml_to_asciimath(input)
|
19
|
+
# If given string does not include '<' (for elements) nor '&'
|
20
|
+
# (for entities), then it's certain that it doesn't contain
|
21
|
+
# any MathML or HTML formula.
|
22
|
+
return input unless input&.match?(/<|&/)
|
23
|
+
|
24
|
+
unless input.include?("<math>")
|
25
|
+
return html_to_asciimath(input)
|
26
|
+
end
|
27
|
+
|
28
|
+
# puts "GOING TO MATHML MATH"
|
29
|
+
# puts input
|
30
|
+
to_asciimath = Nokogiri::HTML.fragment(input, "UTF-8")
|
31
|
+
# to_asciimath.remove_namespaces!
|
32
|
+
|
33
|
+
to_asciimath.css("math").each do |math_element|
|
34
|
+
asciimath = Plurimath::Math.parse(
|
35
|
+
text_to_asciimath(math_element.to_xml), :mathml
|
36
|
+
).to_asciimath.strip
|
37
|
+
# puts"ASCIIMATH!! #{asciimath}"
|
38
|
+
|
39
|
+
if asciimath.empty?
|
40
|
+
math_element.remove
|
41
|
+
else
|
42
|
+
math_element.replace "stem:[#{asciimath}]"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
html_to_asciimath(
|
47
|
+
to_asciimath.children.to_s,
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def html_to_asciimath(input)
|
52
|
+
return input if input.nil? || input.empty?
|
53
|
+
|
54
|
+
to_asciimath = Nokogiri::HTML.fragment(input, "UTF-8")
|
55
|
+
|
56
|
+
to_asciimath.css("i").each do |math_element|
|
57
|
+
# puts "HTML MATH!! #{math_element.to_xml}"
|
58
|
+
# puts "HTML MATH!! #{math_element.text}"
|
59
|
+
decoded = text_to_asciimath(math_element.text)
|
60
|
+
case decoded.length
|
61
|
+
when 1..12
|
62
|
+
# puts "(#{math_element.text} to => #{decoded})"
|
63
|
+
math_element.replace "stem:[#{decoded}]"
|
64
|
+
when 0
|
65
|
+
math_element.remove
|
66
|
+
else
|
67
|
+
math_element.replace "_#{decoded}_"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
to_asciimath.css("sub").each do |math_element|
|
72
|
+
case math_element.text.length
|
73
|
+
when 0
|
74
|
+
math_element.remove
|
75
|
+
else
|
76
|
+
math_element.replace "~#{text_to_asciimath(math_element.text)}~"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
to_asciimath.css("sup").each do |math_element|
|
81
|
+
case math_element.text.length
|
82
|
+
when 0
|
83
|
+
math_element.remove
|
84
|
+
else
|
85
|
+
math_element.replace "^#{text_to_asciimath(math_element.text)}^"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
to_asciimath.css("ol").each do |element|
|
90
|
+
element.css("li").each do |li|
|
91
|
+
li.replace ". #{li.text}"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
to_asciimath.css("ul").each do |element|
|
96
|
+
element.css("li").each do |li|
|
97
|
+
li.replace "* #{li.text}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Replace sans-serif font with monospace
|
102
|
+
to_asciimath.css('font[style*="sans-serif"]').each do |x|
|
103
|
+
x.replace "`#{x.text}`"
|
104
|
+
end
|
105
|
+
|
106
|
+
html_entities_to_stem(
|
107
|
+
to_asciimath
|
108
|
+
.children.to_s
|
109
|
+
.gsub(/\]stem:\[/, "")
|
110
|
+
.gsub(/<\/?[uo]l>/, ""),
|
111
|
+
)
|
112
|
+
end
|
113
|
+
|
114
|
+
def text_to_asciimath(text)
|
115
|
+
html_entities_to_asciimath(text.decode_html)
|
116
|
+
end
|
117
|
+
|
118
|
+
def html_entities_to_asciimath(input)
|
119
|
+
input.gsub("α", "alpha").
|
120
|
+
gsub("β", "beta").
|
121
|
+
gsub("γ", "gamma").
|
122
|
+
gsub("Γ", "Gamma").
|
123
|
+
gsub("δ", "delta").
|
124
|
+
gsub("Δ", "Delta").
|
125
|
+
gsub("ε", "epsilon").
|
126
|
+
gsub("ϵ", "varepsilon").
|
127
|
+
gsub("ζ", "zeta").
|
128
|
+
gsub("η", "eta").
|
129
|
+
gsub("θ", "theta").
|
130
|
+
gsub("Θ", "Theta").
|
131
|
+
gsub("ϑ", "vartheta").
|
132
|
+
gsub("ι", "iota").
|
133
|
+
gsub("κ", "kappa").
|
134
|
+
gsub("λ", "lambda").
|
135
|
+
gsub("Λ", "Lambda").
|
136
|
+
gsub("μ", "mu").
|
137
|
+
gsub("ν", "nu").
|
138
|
+
gsub("ξ", "xi").
|
139
|
+
gsub("Ξ", "Xi").
|
140
|
+
gsub("π", "pi").
|
141
|
+
gsub("Π", "Pi").
|
142
|
+
gsub("ρ", "rho").
|
143
|
+
gsub("β", "beta").
|
144
|
+
gsub("σ", "sigma").
|
145
|
+
gsub("Σ", "Sigma").
|
146
|
+
gsub("τ", "tau").
|
147
|
+
gsub("υ", "upsilon").
|
148
|
+
gsub("φ", "phi").
|
149
|
+
gsub("Φ", "Phi").
|
150
|
+
gsub("ϕ", "varphi").
|
151
|
+
gsub("χ", "chi").
|
152
|
+
gsub("ψ", "psi").
|
153
|
+
gsub("Ψ", "Psi").
|
154
|
+
gsub("ω", "omega")
|
155
|
+
end
|
156
|
+
|
157
|
+
def html_entities_to_stem(input)
|
158
|
+
input.gsub("α", "stem:[alpha]").
|
159
|
+
gsub("β", "stem:[beta]").
|
160
|
+
gsub("γ", "stem:[gamma]").
|
161
|
+
gsub("Γ", "stem:[Gamma]").
|
162
|
+
gsub("δ", "stem:[delta]").
|
163
|
+
gsub("Δ", "stem:[Delta]").
|
164
|
+
gsub("ε", "stem:[epsilon]").
|
165
|
+
gsub("ϵ", "stem:[varepsilon]").
|
166
|
+
gsub("ζ", "stem:[zeta]").
|
167
|
+
gsub("η", "stem:[eta]").
|
168
|
+
gsub("θ", "stem:[theta]").
|
169
|
+
gsub("Θ", "stem:[Theta]").
|
170
|
+
gsub("ϑ", "stem:[vartheta]").
|
171
|
+
gsub("ι", "stem:[iota]").
|
172
|
+
gsub("κ", "stem:[kappa]").
|
173
|
+
gsub("λ", "stem:[lambda]").
|
174
|
+
gsub("Λ", "stem:[Lambda]").
|
175
|
+
gsub("μ", "stem:[mu]").
|
176
|
+
gsub("ν", "stem:[nu]").
|
177
|
+
gsub("ξ", "stem:[xi]").
|
178
|
+
gsub("Ξ", "stem:[Xi]").
|
179
|
+
gsub("π", "stem:[pi]").
|
180
|
+
gsub("Π", "stem:[Pi]").
|
181
|
+
gsub("ρ", "stem:[rho]").
|
182
|
+
gsub("β", "stem:[beta]").
|
183
|
+
gsub("σ", "stem:[sigma]").
|
184
|
+
gsub("Σ", "stem:[Sigma]").
|
185
|
+
gsub("τ", "stem:[tau]").
|
186
|
+
gsub("υ", "stem:[upsilon]").
|
187
|
+
gsub("φ", "stem:[phi]").
|
188
|
+
gsub("Φ", "stem:[Phi]").
|
189
|
+
gsub("ϕ", "stem:[varphi]").
|
190
|
+
gsub("χ", "stem:[chi]").
|
191
|
+
gsub("ψ", "stem:[psi]").
|
192
|
+
gsub("Ψ", "stem:[Psi]").
|
193
|
+
gsub("ω", "stem:[omega]")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
module DataConversions
|
8
|
+
refine String do
|
9
|
+
def decode_html!
|
10
|
+
replace(decode_html)
|
11
|
+
nil
|
12
|
+
end
|
13
|
+
|
14
|
+
def decode_html
|
15
|
+
HTMLEntities.new(:expanded).decode(self)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Normalize various encoding anomalies like `\uFEFF` in strings
|
19
|
+
def sanitize!
|
20
|
+
unicode_normalize!
|
21
|
+
gsub!("\uFEFF", "")
|
22
|
+
gsub!("\u2011", "-")
|
23
|
+
gsub!("\u00a0", " ")
|
24
|
+
gsub!(/[\u2000-\u2006]/, " ")
|
25
|
+
strip!
|
26
|
+
nil
|
27
|
+
end
|
28
|
+
|
29
|
+
# @see sanitize!
|
30
|
+
def sanitize
|
31
|
+
dup.tap(&:sanitize!)
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_three_char_code
|
35
|
+
IEV::Iso639Code.three_char_code(self).first
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|