tmx_importer 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99a80fae6e289623de257e8383f5ba8e6d90eda5
4
- data.tar.gz: 150248c983d19427e7cf3ccbfc98c4a63227436f
3
+ metadata.gz: 2d9609a8256bf614eed18bcd0482e7cc72e75f68
4
+ data.tar.gz: ed72d1341c9daf13a7bba504457b25be16d2a93a
5
5
  SHA512:
6
- metadata.gz: 9b869fc95d11ba967d43ef4f54914d557d10bbaa57c44831ed86e4fe36787159262a27b6c2dbcdec3902dc4936a1e7e72a6327f10d30a2576a45487dc5e7432d
7
- data.tar.gz: 0c11e503f9447569d90aaf49199691fd7bb54bd9d24b0a4371d8242259b9353183da08dbe6c751fa10d36c9488b13aa8f2b1c0886c0a06f1cf62f703eef1333d
6
+ metadata.gz: 59eb55bd99594f64f64b4ab9beb1e3990a85082cb5c0d7123ee6168013b5b1a73313eb5c95e222df7cb50f01b5df2111266e825ea99e94a71b14a4e87b31be56
7
+ data.tar.gz: d4a284564130928be00a06597c523033fd10f7069c9235072aeacad409fd720f2f38ee9d49fedcff408598b520efbfe0793311c6a1fbf8c21b9137c77a0c5427
data/.travis.yml CHANGED
@@ -1,4 +1,3 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1.2
4
3
  - 2.2.4
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/tmx_importer.svg)](https://badge.fury.io/rb/tmx_importer) [![Build Status](https://travis-ci.org/diasks2/tmx_importer.png)](https://travis-ci.org/diasks2/tmx_importer) [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/diasks2/tmx_importer/blob/master/LICENSE.txt)
4
4
 
5
- This gem handles the importing and parsing of [.tmx translation memory files](http://www.ttt.org/oscarstandards/tmx/tmx14-20020710.htm). TMX files are xml files.
5
+ This gem handles the importing and parsing of [.tmx translation memory files](http://www.ttt.org/oscarstandards/tmx/tmx14-20020710.htm). [TMX files](https://en.wikipedia.org/wiki/Translation_Memory_eXchange) are xml files.
6
6
 
7
7
  ## Installation
8
8
 
@@ -30,11 +30,11 @@ TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats
30
30
  # Extract the segments of a TMX file
31
31
  # Result: [translation_units, segments]
32
32
  # translation_units = [tu_id, creation_date]
33
- # segments = [tu_id, segment_role, word_count, language, segment_text]
33
+ # segments = [tu_id, segment_role, word_count, language, segment_text, creation_date]
34
34
 
35
35
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
36
36
  TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').import
37
- # => [[["5533-1457670156-1", "2016-03-11T13:22:36+09:00"], ["6836-1457670156-3", "2016-03-11T13:22:36+09:00"], ["3285-1457670156-5", "2016-03-11T13:22:36+09:00"], ["6706-1457670156-7", "2016-03-11T13:22:36+09:00"]], [["5533-1457670156-1", "", 1, "de-DE", "überprüfen"], ["5533-1457670156-1", "target", 1, "en-US", "check"], ["6836-1457670156-3", "source", 1, "de-DE", "Rückenlehneneinstellung"], ["6836-1457670156-3", "target", 2, "en-US", "Backrest adjustment"], ["3285-1457670156-5", "source", 1, "de-DE", "Bezüglich"], ["3285-1457670156-5", "target", 3, "en-US", "In terms of"], ["6706-1457670156-7", "source", 20, "de-DE", "Der Staatsschutz prüft, ob es einen Zusammenhang mit einem Anschlag auf eine geplante Flüchtlingsunterkunft in der Nachbarschaft Ende August gibt."], ["6706-1457670156-7", "target", 23, "en-US", "The state protection checks whether there is a connection with an attack on a planned refugee camp in the neighborhood of late August."]]]
37
+ # => [[["3638-1457683912-1", "2016-03-11T17:11:52+09:00"], ["7214-1457683912-3", "2016-03-11T17:11:52+09:00"], ["1539-1457683912-5", "2016-03-11T17:11:52+09:00"], ["6894-1457683912-7", "2016-03-11T17:11:52+09:00"]], [["3638-1457683912-1", "", 1, "de-DE", "überprüfen", "2016-03-11T17:11:52+09:00"], ["3638-1457683912-1", "target", 1, "en-US", "check", "2016-03-11T17:11:52+09:00"], ["7214-1457683912-3", "source", 1, "de-DE", "Rückenlehneneinstellung", "2016-03-11T17:11:52+09:00"], ["7214-1457683912-3", "target", 2, "en-US", "Backrest adjustment", "2016-03-11T17:11:52+09:00"], ["1539-1457683912-5", "source", 1, "de-DE", "Bezüglich", "2016-03-11T17:11:52+09:00"], ["1539-1457683912-5", "target", 3, "en-US", "In terms of", "2016-03-11T17:11:52+09:00"], ["6894-1457683912-7", "source", 20, "de-DE", "Der Staatsschutz prüft, ob es einen Zusammenhang mit einem Anschlag auf eine geplante Flüchtlingsunterkunft in der Nachbarschaft Ende August gibt.", "2016-03-11T17:11:52+09:00"], ["6894-1457683912-7", "target", 23, "en-US", "The state protection checks whether there is a connection with an attack on a planned refugee camp in the neighborhood of late August.", "2016-03-11T17:11:52+09:00"]]]
38
38
  ```
39
39
 
40
40
  ## Contributing
@@ -1,3 +1,3 @@
1
1
  module TmxImporter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/tmx_importer.rb CHANGED
@@ -14,7 +14,7 @@ module TmxImporter
14
14
  @encoding = encoding.upcase
15
15
  @doc = {
16
16
  source_language: "",
17
- tu: { id: "", counter: 0, vals: [], lang: "" },
17
+ tu: { id: "", counter: 0, vals: [], lang: "", creation_date: "" },
18
18
  seg: { lang: "", counter: 0, vals: [], role: "" },
19
19
  language_pairs: []
20
20
  }
@@ -117,15 +117,15 @@ module TmxImporter
117
117
 
118
118
  def write_tu(reader)
119
119
  @doc[:tu][:lang] = reader.get_attribute("srclang")
120
- created_date = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate")).to_s
121
- @doc[:tu][:vals] << [@doc[:tu][:id], created_date]
120
+ @doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate")).to_s
121
+ @doc[:tu][:vals] << [@doc[:tu][:id], @doc[:tu][:creation_date]]
122
122
  end
123
123
 
124
124
  def write_seg(reader)
125
125
  return if reader.read_string.empty?
126
126
  text = PrettyStrings::Cleaner.new(reader.read_string.force_encoding('UTF-8')).pretty.gsub("\\","&#92;").gsub("'",%q(\\\'))
127
127
  word_count = text.gsub("\s+", ' ').split(' ').length
128
- @doc[:seg][:vals] << [@doc[:tu][:id], @doc[:seg][:role], word_count, @doc[:seg][:lang], text]
128
+ @doc[:seg][:vals] << [@doc[:tu][:id], @doc[:seg][:role], word_count, @doc[:seg][:lang], text, @doc[:tu][:creation_date]]
129
129
  end
130
130
 
131
131
  def generate_unique_id
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tmx_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias