obp-access 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +1 -15
- data/README.adoc +1 -1
- data/lib/obp/access/elements/terminology/base.rb +2 -4
- data/lib/obp/access/elements/terminology/tig.rb +6 -2
- data/lib/obp/access/elements/terminology/tig_admitted.rb +4 -6
- data/lib/obp/access/elements/terminology/tig_deprecated.rb +4 -12
- data/lib/obp/access/elements/terminology/tig_preferred.rb +4 -6
- data/lib/obp/access/grammar_parser.rb +22 -12
- data/lib/obp/access/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb730bc50a9d42e2fab9002aa77d4bfd9c495324d8d7e3a2dc5ea88eacbcedc9
|
|
4
|
+
data.tar.gz: 8be58734fa23ab768dfa1bbe2d4907b738ef602f8bfeadc9b0d5293ba22ccce9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b159d5f9b615f94cadaa30c0bbb75cedc4a1761abf712cd1160fb227fbdc8dc7c2413fded7002b2132d5aedf2284081b17b34bf3055e7beab3688bf6e6767bd
|
|
7
|
+
data.tar.gz: 3c1fc77d1d229be553bc0b694eb16688dac4e2d0099fe0162bba4a51aa1de4c4a8978c8c6e4221bbae5d96f50771384be9efbec2e89f74cc6a0ad40f0b5403e8
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-
|
|
3
|
+
# on 2026-06-13 11:32:12 UTC using RuboCop version 1.86.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,23 +11,9 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'obp-access.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count: 1
|
|
15
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
16
|
-
# Configuration parameters: AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
17
|
-
# URISchemes: http, https
|
|
18
|
-
Layout/LineLength:
|
|
19
|
-
Max: 124
|
|
20
|
-
|
|
21
14
|
# Offense count: 3
|
|
22
15
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
23
16
|
Metrics/AbcSize:
|
|
24
17
|
Exclude:
|
|
25
18
|
- 'lib/obp/access/elements/bibliography/bib_ref.rb'
|
|
26
19
|
- 'lib/obp/access/elements/root.rb'
|
|
27
|
-
|
|
28
|
-
# Offense count: 1
|
|
29
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
30
|
-
# Configuration parameters: AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
31
|
-
# URISchemes: http, https
|
|
32
|
-
Layout/LineLength:
|
|
33
|
-
Max: 124
|
data/README.adoc
CHANGED
|
@@ -82,7 +82,7 @@ $ obp-access fetch -l fr,de -o output/ iso:std:iso:5598:ed-3:v1:en
|
|
|
82
82
|
The output is NISO STS XML with TBX-Basic terminology markup:
|
|
83
83
|
|
|
84
84
|
* Terms use `<tbx:termEntry>` with `<tbx:langSet>` per language
|
|
85
|
-
* Grammar is encoded via `<tbx:
|
|
85
|
+
* Grammar is encoded via `<tbx:grammaticalGender>` (values: masculine/feminine/neuter) and `<tbx:partOfSpeech>`
|
|
86
86
|
* Domains are extracted as `<tbx:subjectField>`
|
|
87
87
|
* Deprecated terms use `<tbx:normativeAuthorization value="deprecatedTerm"/>`
|
|
88
88
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Obp
|
|
2
4
|
class Access
|
|
3
5
|
class Renderer
|
|
@@ -15,10 +17,6 @@ module Obp
|
|
|
15
17
|
def path_suffix
|
|
16
18
|
"/tbx:termEntry/tbx:langSet"
|
|
17
19
|
end
|
|
18
|
-
|
|
19
|
-
def bold_term?(node)
|
|
20
|
-
node.inner_html.start_with?("<b>") || node.inner_html.include?("<b>")
|
|
21
|
-
end
|
|
22
20
|
end
|
|
23
21
|
end
|
|
24
22
|
end
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Obp
|
|
2
4
|
class Access
|
|
3
5
|
class Renderer
|
|
4
6
|
class Elements
|
|
5
7
|
class Terminology
|
|
6
8
|
class Tig < Base
|
|
9
|
+
NORMATIVE_AUTHORIZATION = "preferredTerm"
|
|
10
|
+
|
|
7
11
|
def self.classes
|
|
8
12
|
%w[sts-tbx-term]
|
|
9
13
|
end
|
|
@@ -19,7 +23,7 @@ module Obp
|
|
|
19
23
|
end
|
|
20
24
|
|
|
21
25
|
def normative_authorization
|
|
22
|
-
|
|
26
|
+
self.class::NORMATIVE_AUTHORIZATION
|
|
23
27
|
end
|
|
24
28
|
|
|
25
29
|
def content
|
|
@@ -46,7 +50,7 @@ module Obp
|
|
|
46
50
|
return unless genders.any?
|
|
47
51
|
|
|
48
52
|
genders.each do |gender|
|
|
49
|
-
xml.public_send(:"tbx:
|
|
53
|
+
xml.public_send(:"tbx:grammaticalGender", value: gender)
|
|
50
54
|
end
|
|
51
55
|
end
|
|
52
56
|
end
|
|
@@ -1,18 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Obp
|
|
2
4
|
class Access
|
|
3
5
|
class Renderer
|
|
4
6
|
class Elements
|
|
5
7
|
class Terminology
|
|
6
8
|
class TigAdmitted < Tig
|
|
9
|
+
NORMATIVE_AUTHORIZATION = "admittedTerm"
|
|
10
|
+
|
|
7
11
|
def self.classes
|
|
8
12
|
%w[sts-tbx-term admittedTerm]
|
|
9
13
|
end
|
|
10
|
-
|
|
11
|
-
private
|
|
12
|
-
|
|
13
|
-
def normative_authorization
|
|
14
|
-
"admittedTerm"
|
|
15
|
-
end
|
|
16
14
|
end
|
|
17
15
|
end
|
|
18
16
|
end
|
|
@@ -1,27 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Obp
|
|
2
4
|
class Access
|
|
3
5
|
class Renderer
|
|
4
6
|
class Elements
|
|
5
7
|
class Terminology
|
|
6
8
|
class TigDeprecated < Tig
|
|
9
|
+
NORMATIVE_AUTHORIZATION = "deprecatedTerm"
|
|
10
|
+
|
|
7
11
|
def self.classes
|
|
8
12
|
%w[sts-tbx-term deprecatedTerm]
|
|
9
13
|
end
|
|
10
14
|
|
|
11
15
|
private
|
|
12
16
|
|
|
13
|
-
def normative_authorization
|
|
14
|
-
"deprecatedTerm"
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def content
|
|
18
|
-
Nokogiri::XML::Builder.new do |xml|
|
|
19
|
-
xml.public_send(:"tbx:tig", id: "term_#{id}-#{index}") do
|
|
20
|
-
render_tig_content(xml)
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
17
|
def parsed_html
|
|
26
18
|
strip_deprecation_label(node.inner_html)
|
|
27
19
|
end
|
|
@@ -1,18 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Obp
|
|
2
4
|
class Access
|
|
3
5
|
class Renderer
|
|
4
6
|
class Elements
|
|
5
7
|
class Terminology
|
|
6
8
|
class TigPreferred < Tig
|
|
9
|
+
NORMATIVE_AUTHORIZATION = "preferredTerm"
|
|
10
|
+
|
|
7
11
|
def self.classes
|
|
8
12
|
%w[sts-tbx-term preferredTerm]
|
|
9
13
|
end
|
|
10
|
-
|
|
11
|
-
private
|
|
12
|
-
|
|
13
|
-
def normative_authorization
|
|
14
|
-
"preferredTerm"
|
|
15
|
-
end
|
|
16
14
|
end
|
|
17
15
|
end
|
|
18
16
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Obp
|
|
2
4
|
class Access
|
|
3
5
|
class GrammarParser
|
|
@@ -9,18 +11,22 @@ module Obp
|
|
|
9
11
|
"verb" => "verb",
|
|
10
12
|
}.freeze
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
GENDER_MAP = {
|
|
15
|
+
"m" => "masculine",
|
|
16
|
+
"f" => "feminine",
|
|
17
|
+
"n" => "neuter",
|
|
18
|
+
}.freeze
|
|
13
19
|
|
|
14
20
|
BOLD_PATTERNS = [
|
|
15
21
|
[->(t) { POS_MAP.key?(t) }, :handle_pos_marker],
|
|
16
|
-
[->(t) {
|
|
22
|
+
[->(t) { GENDER_MAP.key?(t) }, :handle_gender_marker],
|
|
17
23
|
[->(t) { t.match?(/\A[mfn],\z/) }, :handle_gender_with_comma],
|
|
18
24
|
[->(t) { t.match?(/\A[mfn][,\s]+[mfn]([,\s]+[mfn])*\z/) }, :handle_multi_gender],
|
|
19
25
|
[->(t) { t == "," }, :handle_comma],
|
|
20
|
-
[->(t) { t == "〈" },
|
|
21
|
-
[->(t) { t == "〉" },
|
|
22
|
-
[->(t) { t.match?(/\A[mfn]\s+/) },
|
|
23
|
-
[->(t) { t.match?(/,.+[mfn]\z/) },
|
|
26
|
+
[->(t) { t == "〈" }, :handle_enter_bracket],
|
|
27
|
+
[->(t) { t == "〉" }, :handle_exit_bracket],
|
|
28
|
+
[->(t) { t.match?(/\A[mfn]\s+/) }, :handle_gender_qualifier],
|
|
29
|
+
[->(t) { t.match?(/,.+[mfn]\z/) }, :handle_term_with_gender],
|
|
24
30
|
].freeze
|
|
25
31
|
|
|
26
32
|
def self.parse(inner_html)
|
|
@@ -60,15 +66,15 @@ module Obp
|
|
|
60
66
|
end
|
|
61
67
|
|
|
62
68
|
def handle_gender_marker(text, state)
|
|
63
|
-
state
|
|
69
|
+
add_gender(state, text.strip)
|
|
64
70
|
end
|
|
65
71
|
|
|
66
72
|
def handle_gender_with_comma(text, state)
|
|
67
|
-
state
|
|
73
|
+
add_gender(state, text.strip[0])
|
|
68
74
|
end
|
|
69
75
|
|
|
70
76
|
def handle_multi_gender(text, state)
|
|
71
|
-
text.strip.scan(/[mfn]/).each { |
|
|
77
|
+
text.strip.scan(/[mfn]/).each { |code| add_gender(state, code) }
|
|
72
78
|
end
|
|
73
79
|
|
|
74
80
|
def handle_enter_bracket(_text, state)
|
|
@@ -80,14 +86,14 @@ module Obp
|
|
|
80
86
|
end
|
|
81
87
|
|
|
82
88
|
def handle_gender_qualifier(text, state)
|
|
83
|
-
state
|
|
89
|
+
add_gender(state, text.strip[0])
|
|
84
90
|
end
|
|
85
91
|
|
|
86
92
|
def handle_term_with_gender(text, state)
|
|
87
93
|
stripped = text.strip
|
|
88
94
|
if stripped =~ /\A(.+),\s*([mfn])\z/
|
|
89
|
-
state[:term_parts] <<
|
|
90
|
-
state
|
|
95
|
+
state[:term_parts] << Regexp.last_match(1).strip
|
|
96
|
+
add_gender(state, Regexp.last_match(2))
|
|
91
97
|
else
|
|
92
98
|
state[:term_parts] << stripped
|
|
93
99
|
end
|
|
@@ -105,6 +111,10 @@ module Obp
|
|
|
105
111
|
|
|
106
112
|
def handle_skip(_text, _state); end
|
|
107
113
|
|
|
114
|
+
def add_gender(state, code)
|
|
115
|
+
state[:genders] << GENDER_MAP.fetch(code)
|
|
116
|
+
end
|
|
117
|
+
|
|
108
118
|
def parse_segments(html)
|
|
109
119
|
segments = []
|
|
110
120
|
remaining = html.dup
|
data/lib/obp/access/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: obp-access
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-06-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|