tbx_importer 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/tbx_importer/version.rb +1 -1
- data/lib/tbx_importer.rb +23 -14
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa7e9edaa27e364b2f4735dc7b4e71253c57e0fc
|
4
|
+
data.tar.gz: 53c3f75de385653094bd1a9a71f821fe052487d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a12d12838ee83be6442085cae9917b420d44790acea53a2074f9e429128ea0b8885121d3ba3feec181ab761ed62bbcb9d65c303357fe7876ab1649a41b64c45a
|
7
|
+
data.tar.gz: 39e3065d6d167cd9632558a8f45f04b53607a2b32e3c4ab1a008e9cc8ae0d7516f48348e343b64b68b722e5f44ee6b1741c7394283ed1df794b1557e440a9144
|
data/lib/tbx_importer/version.rb
CHANGED
data/lib/tbx_importer.rb
CHANGED
@@ -26,7 +26,7 @@ module TbxImporter
|
|
26
26
|
@doc = {
|
27
27
|
source_language: "",
|
28
28
|
tc: { id: "", counter: 0, vals: [], lang: "", definition: "" },
|
29
|
-
term: { lang: "", counter: 0, vals: [], part_of_speech: "" },
|
29
|
+
term: { lang: "", counter: 0, vals: [], part_of_speech: "", term: "" },
|
30
30
|
language_pairs: [],
|
31
31
|
term_entry: false
|
32
32
|
}
|
@@ -94,17 +94,19 @@ module TbxImporter
|
|
94
94
|
|
95
95
|
def parse_file(reader)
|
96
96
|
tag_stack = []
|
97
|
-
generate_unique_id
|
98
97
|
while reader.read do
|
99
|
-
if
|
100
|
-
tag_stack.
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
98
|
+
if reader.node_type.to_i.eql?(1) && reader.read_string.nil?
|
99
|
+
tag_stack.pop
|
100
|
+
else
|
101
|
+
if !tag_stack.include?(reader.name)
|
102
|
+
tag_stack.push(reader.name)
|
103
|
+
eval_state(tag_stack, reader)
|
104
|
+
elsif tag_stack.last == reader.name
|
105
|
+
tag_stack.pop
|
105
106
|
end
|
106
107
|
end
|
107
108
|
end
|
109
|
+
@doc[:tc][:vals].pop if @doc[:tc][:vals].last[0] != @doc[:term][:vals].last[0]
|
108
110
|
reader.close
|
109
111
|
end
|
110
112
|
|
@@ -114,6 +116,7 @@ module TbxImporter
|
|
114
116
|
@doc[:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
|
115
117
|
@doc[:language_pairs] << @doc[:lang]
|
116
118
|
when [116, 101, 114, 109, 69, 110, 116, 114, 121] #termEntry
|
119
|
+
generate_unique_id
|
117
120
|
write_tc
|
118
121
|
when [108, 97, 110, 103, 83, 101, 116] #langSet
|
119
122
|
@doc[:term][:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
|
@@ -122,9 +125,11 @@ module TbxImporter
|
|
122
125
|
write_term(reader)
|
123
126
|
when [116, 101, 114, 109, 78, 111, 116, 101] #termNote
|
124
127
|
unless reader.read_string.nil?
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
+
if reader.get_attribute("type").eql?("partOfSpeech")
|
129
|
+
@doc[:term][:part_of_speech] = PrettyStrings::Cleaner.new(reader.read_string.downcase).pretty.gsub("\\","\").gsub("'",%q(\\\'))
|
130
|
+
@doc[:term][:vals].pop
|
131
|
+
write_term_pos
|
132
|
+
end
|
128
133
|
end
|
129
134
|
when [100, 101, 115, 99, 114, 105, 112] #descrip
|
130
135
|
@doc[:tc][:definition] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","\").gsub("'",%q(\\\')) if reader.get_attribute("type").eql?("definition")
|
@@ -140,9 +145,13 @@ module TbxImporter
|
|
140
145
|
|
141
146
|
def write_term(reader)
|
142
147
|
return if reader.read_string.nil?
|
143
|
-
|
144
|
-
word_count =
|
145
|
-
@doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech],
|
148
|
+
@doc[:term][:term] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","\").gsub("'",%q(\\\'))
|
149
|
+
word_count = @doc[:term][:term].gsub("\s+", ' ').split(' ').length
|
150
|
+
@doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
|
151
|
+
end
|
152
|
+
|
153
|
+
def write_term_pos
|
154
|
+
@doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
|
146
155
|
end
|
147
156
|
|
148
157
|
def generate_unique_id
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tbx_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|