sdltm_importer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7718a812dafcc0bcd536981d537d0e2399772d3f
4
+ data.tar.gz: 3c794c070302a2f8584a0f8f057b204e73eccec6
5
+ SHA512:
6
+ metadata.gz: d3d8d68a8af176e447094c6dbe274fa5fd97226cd17672886f5167248f942bdcf18bf98b225a4b29b945e0fae294cc338c97d9b3549b019e294744b0fd0d8836
7
+ data.tar.gz: 30c885b5125333beff959f419bc4db15a33a88eaa80c57eaad39c1c66b4dd2faef76997e40de28c0327f742e4daf14dcc02766e1af6aa04a3d43d8eb98d1ed9a
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.4
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sdltm_importer.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # SDLTM Importer
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/sdltm_importer.svg)](https://badge.fury.io/rb/sdltm_importer) [![Build Status](https://travis-ci.org/diasks2/sdltm_importer.png)](https://travis-ci.org/diasks2/sdltm_importer) [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/diasks2/sdltm_importer/blob/master/LICENSE.txt)
4
+
5
+ This gem handles the importing and parsing of .sdltm translation memory files.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ **Ruby**
12
+ ```
13
+ gem install sdltm_importer
14
+ ```
15
+
16
+ **Ruby on Rails**
17
+ Add this line to your application’s Gemfile:
18
+ ```ruby
19
+ gem 'sdltm_importer'
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ # Get the high level stats of a .sdltm file
26
+ file_path = File.expand_path('../sample.sdltm')
27
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
28
+ sdltm.stats
29
+ # => {:tu_count=>1, :seg_count=>2, :language_pairs=>[["en", "fr"]]}
30
+
31
+ # Extract the segments of a .sdltm file
32
+ # Result: [translation_units, segments]
33
+ # translation_units = [tu_id, creation_date]
34
+ # segments = [tu_id, segment_role, word_count, language, segment_text, creation_date]
35
+
36
+ sdltm.import
37
+ # => [[["6234-1457917153-1"]], [["6234-1457917153-1", "source", 2, "en", "Hello world"], ["6234-1457917153-1", "target", 3, "fr", "Bonjour le monde"]]]
38
+ ```
39
+
40
+ ## Contributing
41
+
42
+ 1. Fork it ( https://github.com/diasks2/sdltm_importer/fork )
43
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
44
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
45
+ 4. Push to the branch (`git push origin my-new-feature`)
46
+ 5. Create a new Pull Request
47
+
48
+ ## License
49
+
50
+ The MIT License (MIT)
51
+
52
+ Copyright (c) 2016 Kevin S. Dias
53
+
54
+ Permission is hereby granted, free of charge, to any person obtaining a copy
55
+ of this software and associated documentation files (the "Software"), to deal
56
+ in the Software without restriction, including without limitation the rights
57
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
58
+ copies of the Software, and to permit persons to whom the Software is
59
+ furnished to do so, subject to the following conditions:
60
+
61
+ The above copyright notice and this permission notice shall be included in
62
+ all copies or substantial portions of the Software.
63
+
64
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
65
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
66
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
67
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
68
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
69
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
70
+ THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task default: :spec
@@ -0,0 +1,120 @@
1
+ require 'sdltm_importer/version'
2
+ require 'sqlite3'
3
+ require 'open-uri'
4
+ require 'pretty_strings'
5
+
6
+ module SdltmImporter
7
+ class Tag
8
+ attr_accessor :id, :content
9
+ TAG_REGEX = /<TagID>(\d*)<\/TagID>/
10
+ def initialize(tag, tag_content)
11
+ @id = parse_tag_id tag
12
+ @content = tag_content
13
+ end
14
+
15
+ def parse_tag_id(tags)
16
+ tags[0].scan(TAG_REGEX).first
17
+ end
18
+ end
19
+
20
+ class Sdltm
21
+ TUV_TRANSLATION_REGEX = /<Elements><Text><Value>(.*)<\/Value><\/Text><\/Elements>/
22
+ TUV_LANG_REGEX = /<CultureName>(.*)<\/CultureName>/
23
+ TUV_TAGS_REGEX = /<Tag>(.*?)<\/Tag>/
24
+ TUV_CONTENT_REGEX = /<\/Tag><Text><Value>(.*?)<\/Value><\/Text><Tag>/
25
+ attr_reader :file_path
26
+ def initialize(file_path:)
27
+ @file_path = file_path
28
+ @doc = {
29
+ source_language: "",
30
+ target_language: "",
31
+ tu: { id: "", counter: 0, vals: [], creation_date: "" },
32
+ seg: { lang: "", counter: 0, vals: [], role: "" },
33
+ language_pairs: []
34
+ }
35
+ end
36
+
37
+ def stats
38
+ imported_data
39
+ { tu_count: @doc[:tu][:vals].length, seg_count: @doc[:seg][:vals].length, language_pairs: @doc[:language_pairs] }
40
+ end
41
+
42
+ def import
43
+ imported_data
44
+ [@doc[:tu][:vals], @doc[:seg][:vals]]
45
+ end
46
+
47
+ private
48
+
49
+ def imported_data
50
+ @imported_data ||= import_data
51
+ end
52
+
53
+ def import_data
54
+ db = SQLite3::Database.new(open(file_path).path)
55
+ data = db.execute "Select * FROM translation_units"
56
+ tus = []
57
+ data.each do |segment|
58
+ @doc[:tu][:id] = [(1..4).map{rand(10)}.join(''), Time.now.to_i, @doc[:tu][:counter] += 1 ].join("-")
59
+ @doc[:tu][:creation_date] = iso_timestamp segment[7]
60
+ @doc[:tu][:vals] << [@doc[:tu][:id], @doc[:tu][:creation_date]]
61
+
62
+ [4, 6].each do |i|
63
+ language = segment[i].scan(TUV_LANG_REGEX).flatten[0]
64
+ tags = create_tags(segment[i].scan(TUV_TAGS_REGEX), segment, i)
65
+ segment_text = PrettyStrings::Cleaner.new(parse_segment_text(segment, tags, i)).pretty
66
+ word_count = segment_text.gsub("\s+", ' ').split(' ').length
67
+ if i.eql?(4)
68
+ @doc[:source_language] = language
69
+ @doc[:seg][:role] = 'source'
70
+ else
71
+ @doc[:target_language] = language
72
+ @doc[:seg][:role] = 'target'
73
+ @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
74
+ @doc[:language_pairs] = @doc[:language_pairs].uniq
75
+ end
76
+ @doc[:seg][:lang] = language
77
+ @doc[:seg][:vals] << [@doc[:tu][:id], @doc[:seg][:role], word_count, @doc[:seg][:lang], segment_text, @doc[:tu][:creation_date]]
78
+ end
79
+ end
80
+ end
81
+
82
+ def iso_timestamp(timestamp)
83
+ timestamp.delete('-').delete(':').sub(' ','T') + 'Z'
84
+ end
85
+
86
+ def parse_segment_text(segment, combined_tags, i)
87
+ if combined_tags.nil? || combined_tags.empty?
88
+ text = segment[i].scan(TUV_TRANSLATION_REGEX).flatten[0]
89
+ else
90
+ combined_tags.each_with_index do |tag, i|
91
+ if i.eql?(0)
92
+ if tag.content.nil? || tag.content.empty?
93
+ text = ''
94
+ else
95
+ text = tag.content[0]
96
+ end
97
+ else
98
+ unless tag.content.nil? || tag.content.empty?
99
+ text = text + ' ' + tag.content[0]
100
+ end
101
+ end
102
+ end
103
+ end
104
+ text
105
+ end
106
+
107
+ def create_tags(tags, segment, i)
108
+ unless tags.empty?
109
+ tags = tags.values_at(* tags.each_index.select { |i| i.even? })
110
+ combined_tags = []
111
+ content = segment[i].scan(TUV_CONTENT_REGEX)
112
+ tags.zip(content) do |t, c|
113
+ tag = Tag.new(t, c)
114
+ combined_tags << tag
115
+ end
116
+ end
117
+ combined_tags
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,3 @@
1
+ module SdltmImporter
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sdltm_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "sdltm_importer"
8
+ spec.version = SdltmImporter::VERSION
9
+ spec.authors = ["Kevin S. Dias"]
10
+ spec.email = ["diasks2@gmail.com"]
11
+
12
+ spec.summary = %q{SDLTM file importer}
13
+ spec.description = %q{Import the content of a .sdltm translation memory file}
14
+ spec.homepage = "https://github.com/diasks2/sdltm_importer"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.9"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_runtime_dependency "pretty_strings", "~> 0.5.0"
25
+ spec.add_runtime_dependency "sqlite3", "1.3.10"
26
+ end
Binary file
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ describe SdltmImporter do
4
+ it 'has a version number' do
5
+ expect(SdltmImporter::VERSION).not_to be nil
6
+ end
7
+
8
+ describe '#stats' do
9
+ it 'reports the stats of a .sdltm file' do
10
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
11
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
12
+ expect(sdltm.stats).to eq({:tu_count=>537, :seg_count=>1074, :language_pairs=>[["fr-FR", "en-US"]]})
13
+ end
14
+
15
+ it 'reports the stats of a .sdltm file 2' do
16
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
17
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
18
+ expect(sdltm.stats).to eq({:tu_count=>102, :seg_count=>204, :language_pairs=>[["en-US", "de-DE"]]})
19
+ end
20
+ end
21
+
22
+ describe '#import' do
23
+ it 'imports a .sdltm file 1' do
24
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
25
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
26
+ expect(sdltm.import[0].length).to eq(537)
27
+ end
28
+
29
+ it 'imports a .sdltm file 2' do
30
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
31
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
32
+ expect(sdltm.import[1].length).to eq(1074)
33
+ end
34
+
35
+ it 'imports a .sdltm file 3' do
36
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample.sdltm')
37
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
38
+ expect(sdltm.import[1][-1][4]).to eq("Your website's URL")
39
+ end
40
+
41
+ it 'imports a .sdltm file 4' do
42
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
43
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
44
+ expect(sdltm.import[0].length).to eq(102)
45
+ end
46
+
47
+ it 'imports a .sdltm file 5' do
48
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
49
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
50
+ expect(sdltm.import[1].length).to eq(204)
51
+ end
52
+
53
+ it 'imports a .sdltm file 6' do
54
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
55
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
56
+ expect(sdltm.import[0][-1][0]).to eq(sdltm.import[1][-1][0])
57
+ end
58
+
59
+ it 'imports a .sdltm file 6' do
60
+ file_path = File.expand_path('../sdltm_importer/spec/sample_test_files/sample_2.sdltm')
61
+ sdltm = SdltmImporter::Sdltm.new(file_path: file_path)
62
+ expect(sdltm.import[0][1][0]).to eq(sdltm.import[1][3][0])
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'sdltm_importer'
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sdltm_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin S. Dias
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pretty_strings
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.5.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.5.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: sqlite3
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 1.3.10
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.3.10
83
+ description: Import the content of a .sdltm translation memory file
84
+ email:
85
+ - diasks2@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - README.md
95
+ - Rakefile
96
+ - lib/sdltm_importer.rb
97
+ - lib/sdltm_importer/version.rb
98
+ - sdltm_importer.gemspec
99
+ - spec/sample_test_files/sample.sdltm
100
+ - spec/sample_test_files/sample_2.sdltm
101
+ - spec/sdltm_importer_spec.rb
102
+ - spec/spec_helper.rb
103
+ homepage: https://github.com/diasks2/sdltm_importer
104
+ licenses: []
105
+ metadata: {}
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.4.1
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: SDLTM file importer
126
+ test_files:
127
+ - spec/sample_test_files/sample.sdltm
128
+ - spec/sample_test_files/sample_2.sdltm
129
+ - spec/sdltm_importer_spec.rb
130
+ - spec/spec_helper.rb