sdltm_importer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7718a812dafcc0bcd536981d537d0e2399772d3f
4
+ data.tar.gz: 3c794c070302a2f8584a0f8f057b204e73eccec6
5
+ SHA512:
6
+ metadata.gz: d3d8d68a8af176e447094c6dbe274fa5fd97226cd17672886f5167248f942bdcf18bf98b225a4b29b945e0fae294cc338c97d9b3549b019e294744b0fd0d8836
7
+ data.tar.gz: 30c885b5125333beff959f419bc4db15a33a88eaa80c57eaad39c1c66b4dd2faef76997e40de28c0327f742e4daf14dcc02766e1af6aa04a3d43d8eb98d1ed9a
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.4
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sdltm_importer.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # SDLTM Importer
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/sdltm_importer.svg)](https://badge.fury.io/rb/sdltm_importer) [![Build Status](https://travis-ci.org/diasks2/sdltm_importer.png)](https://travis-ci.org/diasks2/sdltm_importer) [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/diasks2/sdltm_importer/blob/master/LICENSE.txt)
4
+
5
+ This gem handles the importing and parsing of .sdltm translation memory files.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ **Ruby**
12
+ ```
13
+ gem install sdltm_importer
14
+ ```
15
+
16
+ **Ruby on Rails**
17
+ Add this line to your application’s Gemfile:
18
+ ```ruby
19
+ gem 'sdltm_importer'
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ # Get the high level stats of a .sdltm file
26
+ file_path = File.expand_path('../sample.sdltm')
27
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
28
+ sdltm.stats
29
+ # => {:tu_count=>1, :seg_count=>2, :language_pairs=>[["en", "fr"]]}
30
+
31
+ # Extract the segments of a .sdltm file
32
+ # Result: [translation_units, segments]
33
+ # translation_units = [tu_id, creation_date]
34
+ # segments = [tu_id, segment_role, word_count, language, segment_text, creation_date]
35
+
36
+ sdltm.import
37
+ # => [[["6234-1457917153-1"]], [["6234-1457917153-1", "source", 2, "en", "Hello world"], ["6234-1457917153-1", "target", 3, "fr", "Bonjour le monde"]]]
38
+ ```
39
+
40
+ ## Contributing
41
+
42
+ 1. Fork it ( https://github.com/diasks2/sdltm_importer/fork )
43
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
44
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
45
+ 4. Push to the branch (`git push origin my-new-feature`)
46
+ 5. Create a new Pull Request
47
+
48
+ ## License
49
+
50
+ The MIT License (MIT)
51
+
52
+ Copyright (c) 2016 Kevin S. Dias
53
+
54
+ Permission is hereby granted, free of charge, to any person obtaining a copy
55
+ of this software and associated documentation files (the "Software"), to deal
56
+ in the Software without restriction, including without limitation the rights
57
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
58
+ copies of the Software, and to permit persons to whom the Software is
59
+ furnished to do so, subject to the following conditions:
60
+
61
+ The above copyright notice and this permission notice shall be included in
62
+ all copies or substantial portions of the Software.
63
+
64
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
65
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
66
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
67
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
68
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
69
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
70
+ THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task default: :spec
@@ -0,0 +1,120 @@
1
+ require 'sdltm_importer/version'
2
+ require 'sqlite3'
3
+ require 'open-uri'
4
+ require 'pretty_strings'
5
+
6
+ module SdltmImporter
7
+ class Tag
8
+ attr_accessor :id, :content
9
+ TAG_REGEX = /<TagID>(\d*)<\/TagID>/
10
+ def initialize(tag, tag_content)
11
+ @id = parse_tag_id tag
12
+ @content = tag_content
13
+ end
14
+
15
+ def parse_tag_id(tags)
16
+ tags[0].scan(TAG_REGEX).first
17
+ end
18
+ end
19
+
20
+ class Sdltm
21
+ TUV_TRANSLATION_REGEX = /<Elements><Text><Value>(.*)<\/Value><\/Text><\/Elements>/
22
+ TUV_LANG_REGEX = /<CultureName>(.*)<\/CultureName>/
23
+ TUV_TAGS_REGEX = /<Tag>(.*?)<\/Tag>/
24
+ TUV_CONTENT_REGEX = /<\/Tag><Text><Value>(.*?)<\/Value><\/Text><Tag>/
25
+ attr_reader :file_path
26
+ def initialize(file_path:)
27
+ @file_path = file_path
28
+ @doc = {
29
+ source_language: "",
30
+ target_language: "",
31
+ tu: { id: "", counter: 0, vals: [], creation_date: "" },
32
+ seg: { lang: "", counter: 0, vals: [], role: "" },
33
+ language_pairs: []
34
+ }
35
+ end
36
+
37
+ def stats
38
+ imported_data
39
+ { tu_count: @doc[:tu][:vals].length, seg_count: @doc[:seg][:vals].length, language_pairs: @doc[:language_pairs] }
40
+ end
41
+
42
+ def import
43
+ imported_data
44
+ [@doc[:tu][:vals], @doc[:seg][:vals]]
45
+ end
46
+
47
+ private
48
+
49
+ def imported_data
50
+ @imported_data ||= import_data
51
+ end
52
+
53
+ def import_data
54
+ db = SQLite3::Database.new(open(file_path).path)
55
+ data = db.execute "Select * FROM translation_units"
56
+ tus = []
57
+ data.each do |segment|
58
+ @doc[:tu][:id] = [(1..4).map{rand(10)}.join(''), Time.now.to_i, @doc[:tu][:counter] += 1 ].join("-")
59
+ @doc[:tu][:creation_date] = iso_timestamp segment[7]
60
+ @doc[:tu][:vals] << [@doc[:tu][:id], @doc[:tu][:creation_date]]
61
+
62
+ [4, 6].each do |i|
63
+ language = segment[i].scan(TUV_LANG_REGEX).flatten[0]
64
+ tags = create_tags(segment[i].scan(TUV_TAGS_REGEX), segment, i)
65
+ segment_text = PrettyStrings::Cleaner.new(parse_segment_text(segment, tags, i)).pretty
66
+ word_count = segment_text.gsub("\s+", ' ').split(' ').length
67
+ if i.eql?(4)
68
+ @doc[:source_language] = language
69
+ @doc[:seg][:role] = 'source'
70
+ else
71
+ @doc[:target_language] = language
72
+ @doc[:seg][:role] = 'target'
73
+ @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
74
+ @doc[:language_pairs] = @doc[:language_pairs].uniq
75
+ end
76
+ @doc[:seg][:lang] = language
77
+ @doc[:seg][:vals] << [@doc[:tu][:id], @doc[:seg][:role], word_count, @doc[:seg][:lang], segment_text, @doc[:tu][:creation_date]]
78
+ end
79
+ end
80
+ end
81
+
82
+ def iso_timestamp(timestamp)
83
+ timestamp.delete('-').delete(':').sub(' ','T') + 'Z'
84
+ end
85
+
86
+ def parse_segment_text(segment, combined_tags, i)
87
+ if combined_tags.nil? || combined_tags.empty?
88
+ text = segment[i].scan(TUV_TRANSLATION_REGEX).flatten[0]
89
+ else
90
+ combined_tags.each_with_index do |tag, i|
91
+ if i.eql?(0)
92
+ if tag.content.nil? || tag.content.empty?
93
+ text = ''
94
+ else
95
+ text = tag.content[0]
96
+ end
97
+ else
98
+ unless tag.content.nil? || tag.content.empty?
99
+ text = text + ' ' + tag.content[0]
100
+ end
101
+ end
102
+ end
103
+ end
104
+ text
105
+ end
106
+
107
+ def create_tags(tags, segment, i)
108
+ unless tags.empty?
109
+ tags = tags.values_at(* tags.each_index.select { |i| i.even? })
110
+ combined_tags = []
111
+ content = segment[i].scan(TUV_CONTENT_REGEX)
112
+ tags.zip(content) do |t, c|
113
+ tag = Tag.new(t, c)
114
+ combined_tags << tag
115
+ end
116
+ end
117
+ combined_tags
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,3 @@
1
+ module SdltmImporter
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sdltm_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "sdltm_importer"
8
+ spec.version = SdltmImporter::VERSION
9
+ spec.authors = ["Kevin S. Dias"]
10
+ spec.email = ["diasks2@gmail.com"]
11
+
12
+ spec.summary = %q{SDLTM file importer}
13
+ spec.description = %q{Import the content of a .sdltm translation memory file}
14
+ spec.homepage = "https://github.com/diasks2/sdltm_importer"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.9"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_runtime_dependency "pretty_strings", "~> 0.5.0"
25
+ spec.add_runtime_dependency "sqlite3", "1.3.10"
26
+ end
Binary file
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ describe SdltmImporter do
4
+ it 'has a version number' do
5
+ expect(SdltmImporter::VERSION).not_to be nil
6
+ end
7
+
8
+ describe '#stats' do
9
+ it 'reports the stats of a .sdltm file' do
10
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
11
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
12
+ expect(sdltm.stats).to eq({:tu_count=>537, :seg_count=>1074, :language_pairs=>[["fr-FR", "en-US"]]})
13
+ end
14
+
15
+ it 'reports the stats of a .sdltm file 2' do
16
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
17
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
18
+ expect(sdltm.stats).to eq({:tu_count=>102, :seg_count=>204, :language_pairs=>[["en-US", "de-DE"]]})
19
+ end
20
+ end
21
+
22
+ describe '#import' do
23
+ it 'imports a .sdltm file 1' do
24
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
25
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
26
+ expect(sdltm.import[0].length).to eq(537)
27
+ end
28
+
29
+ it 'imports a .sdltm file 2' do
30
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
31
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
32
+ expect(sdltm.import[1].length).to eq(1074)
33
+ end
34
+
35
+ it 'imports a .sdltm file 3' do
36
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
37
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
38
+ expect(sdltm.import[1][-1][4]).to eq("Your website's URL")
39
+ end
40
+
41
+ it 'imports a .sdltm file 4' do
42
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
43
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
44
+ expect(sdltm.import[0].length).to eq(102)
45
+ end
46
+
47
+ it 'imports a .sdltm file 5' do
48
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
49
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
50
+ expect(sdltm.import[1].length).to eq(204)
51
+ end
52
+
53
+ it 'imports a .sdltm file 6' do
54
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
55
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
56
+ expect(sdltm.import[0][-1][0]).to eq(sdltm.import[1][-1][0])
57
+ end
58
+
59
+ it 'imports a .sdltm file 6' do
60
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
61
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
62
+ expect(sdltm.import[0][1][0]).to eq(sdltm.import[1][3][0])
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'sdltm_importer'
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sdltm_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin S. Dias
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pretty_strings
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.5.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.5.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: sqlite3
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 1.3.10
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.3.10
83
+ description: Import the content of a .sdltm translation memory file
84
+ email:
85
+ - diasks2@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - README.md
95
+ - Rakefile
96
+ - lib/sdltm_importer.rb
97
+ - lib/sdltm_importer/version.rb
98
+ - sdltm_importer.gemspec
99
+ - spec/sample_test_files/sample.sdltm
100
+ - spec/sample_test_files/sample_2.sdltm
101
+ - spec/sdltm_importer_spec.rb
102
+ - spec/spec_helper.rb
103
+ homepage: https://github.com/diasks2/sdltm_importer
104
+ licenses: []
105
+ metadata: {}
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.4.1
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: SDLTM file importer
126
+ test_files:
127
+ - spec/sample_test_files/sample.sdltm
128
+ - spec/sample_test_files/sample_2.sdltm
129
+ - spec/sdltm_importer_spec.rb
130
+ - spec/spec_helper.rb