tbx_importer 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0a89cc90bfb0da2901149b99a39f968e22afb31
4
- data.tar.gz: 647d1288aa408be31688a10bc31c25fd22ecbfa8
3
+ metadata.gz: aa7e9edaa27e364b2f4735dc7b4e71253c57e0fc
4
+ data.tar.gz: 53c3f75de385653094bd1a9a71f821fe052487d9
5
5
  SHA512:
6
- metadata.gz: 15f48e14b96c3fcbcad3d572a6a22bc413132a7a1adbbfae34c8dfba302d60c6784258baa5291e753db623dfb4bbf43a08b05b474a1eee526bf088d0bc211311
7
- data.tar.gz: ed62cb32c367bafa33ae4f57eae48891d2b0512d8f9887cb2c8049d18aae8722aa5ddc1071b9531f6ea238a4be1d31432ad8bff415d5e856ac02bc7b1db90732
6
+ metadata.gz: a12d12838ee83be6442085cae9917b420d44790acea53a2074f9e429128ea0b8885121d3ba3feec181ab761ed62bbcb9d65c303357fe7876ab1649a41b64c45a
7
+ data.tar.gz: 39e3065d6d167cd9632558a8f45f04b53607a2b32e3c4ab1a008e9cc8ae0d7516f48348e343b64b68b722e5f44ee6b1741c7394283ed1df794b1557e440a9144
@@ -1,3 +1,3 @@
1
1
  module TbxImporter
2
- VERSION = "0.1.7"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/tbx_importer.rb CHANGED
@@ -26,7 +26,7 @@ module TbxImporter
26
26
  @doc = {
27
27
  source_language: "",
28
28
  tc: { id: "", counter: 0, vals: [], lang: "", definition: "" },
29
- term: { lang: "", counter: 0, vals: [], part_of_speech: "" },
29
+ term: { lang: "", counter: 0, vals: [], part_of_speech: "", term: "" },
30
30
  language_pairs: [],
31
31
  term_entry: false
32
32
  }
@@ -94,17 +94,19 @@ module TbxImporter
94
94
 
95
95
  def parse_file(reader)
96
96
  tag_stack = []
97
- generate_unique_id
98
97
  while reader.read do
99
- if !tag_stack.include?(reader.name)
100
- tag_stack.push(reader.name)
101
- eval_state(tag_stack, reader)
102
- elsif tag_stack.last == reader.name
103
- if tag_stack.pop.bytes.to_a == [116, 101, 114, 109, 69, 110, 116, 114, 121]
104
- generate_unique_id
98
+ if reader.node_type.to_i.eql?(1) && reader.read_string.nil?
99
+ tag_stack.pop
100
+ else
101
+ if !tag_stack.include?(reader.name)
102
+ tag_stack.push(reader.name)
103
+ eval_state(tag_stack, reader)
104
+ elsif tag_stack.last == reader.name
105
+ tag_stack.pop
105
106
  end
106
107
  end
107
108
  end
109
+ @doc[:tc][:vals].pop if @doc[:tc][:vals].last[0] != @doc[:term][:vals].last[0]
108
110
  reader.close
109
111
  end
110
112
 
@@ -114,6 +116,7 @@ module TbxImporter
114
116
  @doc[:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
115
117
  @doc[:language_pairs] << @doc[:lang]
116
118
  when [116, 101, 114, 109, 69, 110, 116, 114, 121] #termEntry
119
+ generate_unique_id
117
120
  write_tc
118
121
  when [108, 97, 110, 103, 83, 101, 116] #langSet
119
122
  @doc[:term][:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
@@ -122,9 +125,11 @@ module TbxImporter
122
125
  write_term(reader)
123
126
  when [116, 101, 114, 109, 78, 111, 116, 101] #termNote
124
127
  unless reader.read_string.nil?
125
- @doc[:term][:part_of_speech] = PrettyStrings::Cleaner.new(reader.read_string.downcase).pretty.gsub("\\","&#92;").gsub("'",%q(\\\')) if reader.get_attribute("type").eql?("partOfSpeech")
126
- @doc[:term][:vals].pop
127
- write_term(reader)
128
+ if reader.get_attribute("type").eql?("partOfSpeech")
129
+ @doc[:term][:part_of_speech] = PrettyStrings::Cleaner.new(reader.read_string.downcase).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
130
+ @doc[:term][:vals].pop
131
+ write_term_pos
132
+ end
128
133
  end
129
134
  when [100, 101, 115, 99, 114, 105, 112] #descrip
130
135
  @doc[:tc][:definition] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","&#92;").gsub("'",%q(\\\')) if reader.get_attribute("type").eql?("definition")
@@ -140,9 +145,13 @@ module TbxImporter
140
145
 
141
146
  def write_term(reader)
142
147
  return if reader.read_string.nil?
143
- text = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
144
- word_count = text.gsub("\s+", ' ').split(' ').length
145
- @doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], text]
148
+ @doc[:term][:term] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
149
+ word_count = @doc[:term][:term].gsub("\s+", ' ').split(' ').length
150
+ @doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
151
+ end
152
+
153
+ def write_term_pos
154
+ @doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
146
155
  end
147
156
 
148
157
  def generate_unique_id
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tbx_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-16 00:00:00.000000000 Z
11
+ date: 2016-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler