tbx_importer 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tbx_importer/version.rb +1 -1
- data/lib/tbx_importer.rb +23 -14
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa7e9edaa27e364b2f4735dc7b4e71253c57e0fc
|
4
|
+
data.tar.gz: 53c3f75de385653094bd1a9a71f821fe052487d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a12d12838ee83be6442085cae9917b420d44790acea53a2074f9e429128ea0b8885121d3ba3feec181ab761ed62bbcb9d65c303357fe7876ab1649a41b64c45a
|
7
|
+
data.tar.gz: 39e3065d6d167cd9632558a8f45f04b53607a2b32e3c4ab1a008e9cc8ae0d7516f48348e343b64b68b722e5f44ee6b1741c7394283ed1df794b1557e440a9144
|
data/lib/tbx_importer/version.rb
CHANGED
data/lib/tbx_importer.rb
CHANGED
@@ -26,7 +26,7 @@ module TbxImporter
|
|
26
26
|
@doc = {
|
27
27
|
source_language: "",
|
28
28
|
tc: { id: "", counter: 0, vals: [], lang: "", definition: "" },
|
29
|
-
term: { lang: "", counter: 0, vals: [], part_of_speech: "" },
|
29
|
+
term: { lang: "", counter: 0, vals: [], part_of_speech: "", term: "" },
|
30
30
|
language_pairs: [],
|
31
31
|
term_entry: false
|
32
32
|
}
|
@@ -94,17 +94,19 @@ module TbxImporter
|
|
94
94
|
|
95
95
|
def parse_file(reader)
|
96
96
|
tag_stack = []
|
97
|
-
generate_unique_id
|
98
97
|
while reader.read do
|
99
|
-
if
|
100
|
-
tag_stack.
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
98
|
+
if reader.node_type.to_i.eql?(1) && reader.read_string.nil?
|
99
|
+
tag_stack.pop
|
100
|
+
else
|
101
|
+
if !tag_stack.include?(reader.name)
|
102
|
+
tag_stack.push(reader.name)
|
103
|
+
eval_state(tag_stack, reader)
|
104
|
+
elsif tag_stack.last == reader.name
|
105
|
+
tag_stack.pop
|
105
106
|
end
|
106
107
|
end
|
107
108
|
end
|
109
|
+
@doc[:tc][:vals].pop if @doc[:tc][:vals].last[0] != @doc[:term][:vals].last[0]
|
108
110
|
reader.close
|
109
111
|
end
|
110
112
|
|
@@ -114,6 +116,7 @@ module TbxImporter
|
|
114
116
|
@doc[:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
|
115
117
|
@doc[:language_pairs] << @doc[:lang]
|
116
118
|
when [116, 101, 114, 109, 69, 110, 116, 114, 121] #termEntry
|
119
|
+
generate_unique_id
|
117
120
|
write_tc
|
118
121
|
when [108, 97, 110, 103, 83, 101, 116] #langSet
|
119
122
|
@doc[:term][:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
|
@@ -122,9 +125,11 @@ module TbxImporter
|
|
122
125
|
write_term(reader)
|
123
126
|
when [116, 101, 114, 109, 78, 111, 116, 101] #termNote
|
124
127
|
unless reader.read_string.nil?
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
+
if reader.get_attribute("type").eql?("partOfSpeech")
|
129
|
+
@doc[:term][:part_of_speech] = PrettyStrings::Cleaner.new(reader.read_string.downcase).pretty.gsub("\\","\").gsub("'",%q(\\\'))
|
130
|
+
@doc[:term][:vals].pop
|
131
|
+
write_term_pos
|
132
|
+
end
|
128
133
|
end
|
129
134
|
when [100, 101, 115, 99, 114, 105, 112] #descrip
|
130
135
|
@doc[:tc][:definition] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","\").gsub("'",%q(\\\')) if reader.get_attribute("type").eql?("definition")
|
@@ -140,9 +145,13 @@ module TbxImporter
|
|
140
145
|
|
141
146
|
def write_term(reader)
|
142
147
|
return if reader.read_string.nil?
|
143
|
-
|
144
|
-
word_count =
|
145
|
-
@doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech],
|
148
|
+
@doc[:term][:term] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","\").gsub("'",%q(\\\'))
|
149
|
+
word_count = @doc[:term][:term].gsub("\s+", ' ').split(' ').length
|
150
|
+
@doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
|
151
|
+
end
|
152
|
+
|
153
|
+
def write_term_pos
|
154
|
+
@doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
|
146
155
|
end
|
147
156
|
|
148
157
|
def generate_unique_id
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tbx_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|