tbx_importer 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0a89cc90bfb0da2901149b99a39f968e22afb31
4
- data.tar.gz: 647d1288aa408be31688a10bc31c25fd22ecbfa8
3
+ metadata.gz: aa7e9edaa27e364b2f4735dc7b4e71253c57e0fc
4
+ data.tar.gz: 53c3f75de385653094bd1a9a71f821fe052487d9
5
5
  SHA512:
6
- metadata.gz: 15f48e14b96c3fcbcad3d572a6a22bc413132a7a1adbbfae34c8dfba302d60c6784258baa5291e753db623dfb4bbf43a08b05b474a1eee526bf088d0bc211311
7
- data.tar.gz: ed62cb32c367bafa33ae4f57eae48891d2b0512d8f9887cb2c8049d18aae8722aa5ddc1071b9531f6ea238a4be1d31432ad8bff415d5e856ac02bc7b1db90732
6
+ metadata.gz: a12d12838ee83be6442085cae9917b420d44790acea53a2074f9e429128ea0b8885121d3ba3feec181ab761ed62bbcb9d65c303357fe7876ab1649a41b64c45a
7
+ data.tar.gz: 39e3065d6d167cd9632558a8f45f04b53607a2b32e3c4ab1a008e9cc8ae0d7516f48348e343b64b68b722e5f44ee6b1741c7394283ed1df794b1557e440a9144
@@ -1,3 +1,3 @@
1
1
  module TbxImporter
2
- VERSION = "0.1.7"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/tbx_importer.rb CHANGED
@@ -26,7 +26,7 @@ module TbxImporter
26
26
  @doc = {
27
27
  source_language: "",
28
28
  tc: { id: "", counter: 0, vals: [], lang: "", definition: "" },
29
- term: { lang: "", counter: 0, vals: [], part_of_speech: "" },
29
+ term: { lang: "", counter: 0, vals: [], part_of_speech: "", term: "" },
30
30
  language_pairs: [],
31
31
  term_entry: false
32
32
  }
@@ -94,17 +94,19 @@ module TbxImporter
94
94
 
95
95
  def parse_file(reader)
96
96
  tag_stack = []
97
- generate_unique_id
98
97
  while reader.read do
99
- if !tag_stack.include?(reader.name)
100
- tag_stack.push(reader.name)
101
- eval_state(tag_stack, reader)
102
- elsif tag_stack.last == reader.name
103
- if tag_stack.pop.bytes.to_a == [116, 101, 114, 109, 69, 110, 116, 114, 121]
104
- generate_unique_id
98
+ if reader.node_type.to_i.eql?(1) && reader.read_string.nil?
99
+ tag_stack.pop
100
+ else
101
+ if !tag_stack.include?(reader.name)
102
+ tag_stack.push(reader.name)
103
+ eval_state(tag_stack, reader)
104
+ elsif tag_stack.last == reader.name
105
+ tag_stack.pop
105
106
  end
106
107
  end
107
108
  end
109
+ @doc[:tc][:vals].pop if @doc[:tc][:vals].last[0] != @doc[:term][:vals].last[0]
108
110
  reader.close
109
111
  end
110
112
 
@@ -114,6 +116,7 @@ module TbxImporter
114
116
  @doc[:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
115
117
  @doc[:language_pairs] << @doc[:lang]
116
118
  when [116, 101, 114, 109, 69, 110, 116, 114, 121] #termEntry
119
+ generate_unique_id
117
120
  write_tc
118
121
  when [108, 97, 110, 103, 83, 101, 116] #langSet
119
122
  @doc[:term][:lang] = reader.get_attribute("lang") || reader.get_attribute("xml:lang")
@@ -122,9 +125,11 @@ module TbxImporter
122
125
  write_term(reader)
123
126
  when [116, 101, 114, 109, 78, 111, 116, 101] #termNote
124
127
  unless reader.read_string.nil?
125
- @doc[:term][:part_of_speech] = PrettyStrings::Cleaner.new(reader.read_string.downcase).pretty.gsub("\\","&#92;").gsub("'",%q(\\\')) if reader.get_attribute("type").eql?("partOfSpeech")
126
- @doc[:term][:vals].pop
127
- write_term(reader)
128
+ if reader.get_attribute("type").eql?("partOfSpeech")
129
+ @doc[:term][:part_of_speech] = PrettyStrings::Cleaner.new(reader.read_string.downcase).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
130
+ @doc[:term][:vals].pop
131
+ write_term_pos
132
+ end
128
133
  end
129
134
  when [100, 101, 115, 99, 114, 105, 112] #descrip
130
135
  @doc[:tc][:definition] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","&#92;").gsub("'",%q(\\\')) if reader.get_attribute("type").eql?("definition")
@@ -140,9 +145,13 @@ module TbxImporter
140
145
 
141
146
  def write_term(reader)
142
147
  return if reader.read_string.nil?
143
- text = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
144
- word_count = text.gsub("\s+", ' ').split(' ').length
145
- @doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], text]
148
+ @doc[:term][:term] = PrettyStrings::Cleaner.new(reader.read_string).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
149
+ word_count = @doc[:term][:term].gsub("\s+", ' ').split(' ').length
150
+ @doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
151
+ end
152
+
153
+ def write_term_pos
154
+ @doc[:term][:vals] << [@doc[:tc][:id], @doc[:term][:lang], @doc[:term][:part_of_speech], @doc[:term][:term]]
146
155
  end
147
156
 
148
157
  def generate_unique_id
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tbx_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-16 00:00:00.000000000 Z
11
+ date: 2016-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler