konjac 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +8 -0
- data/lib/konjac/tag.rb +17 -0
- data/lib/konjac/tag_manager.rb +50 -0
- data/lib/konjac/version.rb +1 -1
- data/lib/konjac/word.rb +31 -7
- data/lib/konjac.rb +2 -0
- data/spec/tag_spec.rb +47 -0
- metadata +20 -16
data/README.md
CHANGED
@@ -63,6 +63,14 @@ Use multiple dictionaries:
|
|
63
63
|
|
64
64
|
konjac translate financial_report_en.txt into japanese using finance
|
65
65
|
|
66
|
+
Extract text from a DOCX document (creates a plain-text `test.tags` file):
|
67
|
+
|
68
|
+
konjac extract test.docx
|
69
|
+
|
70
|
+
Import tags file back into DOCX document:
|
71
|
+
|
72
|
+
konjac import test.docx
|
73
|
+
|
66
74
|
Extended Example
|
67
75
|
----------------
|
68
76
|
|
data/lib/konjac/tag.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Konjac
|
2
|
+
class Tag
|
3
|
+
attr_accessor :index, :original, :translated
|
4
|
+
|
5
|
+
def initialize(index, original, translated)
|
6
|
+
@index, @original, @translated = index, original, translated
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
"[[KJ-#{index}]]\n> #{original}#{"\n" + translated if translated?}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def translated?
|
14
|
+
!!translated
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Konjac
|
2
|
+
class TagManager
|
3
|
+
attr_accessor :tags
|
4
|
+
|
5
|
+
STARTS_WITH_CLOSE_TAG = /^\>/
|
6
|
+
KONJAC_TAG = /^\[\[KJ\-(\d+)\]\]/
|
7
|
+
|
8
|
+
def initialize(path)
|
9
|
+
@tags = []
|
10
|
+
parse_lines File.readlines(path)
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_lines(lines)
|
14
|
+
index = nil
|
15
|
+
orig = nil
|
16
|
+
trans = nil
|
17
|
+
|
18
|
+
lines.each do |line|
|
19
|
+
if line =~ KONJAC_TAG
|
20
|
+
# Handle instances where there is no translation
|
21
|
+
unless orig.nil?
|
22
|
+
@tags << Tag.new(index, orig, trans)
|
23
|
+
index = nil
|
24
|
+
orig = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
index = line.match(KONJAC_TAG)[1].to_i
|
28
|
+
elsif line =~ STARTS_WITH_CLOSE_TAG
|
29
|
+
orig = line[2..-1].chomp
|
30
|
+
else
|
31
|
+
trans = line.chomp
|
32
|
+
unless index.nil?
|
33
|
+
@tags << Tag.new(index, orig, trans)
|
34
|
+
index = nil
|
35
|
+
orig = nil
|
36
|
+
trans = nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def all
|
43
|
+
@tags
|
44
|
+
end
|
45
|
+
|
46
|
+
def [](index)
|
47
|
+
@tags[index]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/konjac/version.rb
CHANGED
data/lib/konjac/word.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# This really needs cleanup
|
2
|
+
|
1
3
|
module Konjac
|
2
4
|
module Word
|
3
5
|
class << self
|
@@ -11,20 +13,25 @@ module Konjac
|
|
11
13
|
basename = File.basename(sub_file, ".*")
|
12
14
|
new_path = "#{dirname}/#{basename}_imported.docx"
|
13
15
|
xml_path = "#{dirname}/#{basename}.xml"
|
14
|
-
tags_path = "#{dirname}/#{basename}.
|
16
|
+
tags_path = "#{dirname}/#{basename}.konjac"
|
15
17
|
out_path = "#{dirname}/word/document.xml"
|
16
18
|
|
17
19
|
# Open the original XML file and the updated tags
|
18
20
|
writer = Nokogiri::XML(File.read(xml_path))
|
19
|
-
|
21
|
+
nodes = writer.xpath("//w:t")
|
22
|
+
tags = TagManager.new(tags_path)
|
20
23
|
|
21
24
|
# Overwrite each <w:t> tag's content with the new tag
|
22
|
-
|
23
|
-
|
25
|
+
tags.all.each do |tag|
|
26
|
+
if tag.translated?
|
27
|
+
nodes[tag.index].content = tag.translated
|
28
|
+
end
|
24
29
|
end
|
25
30
|
|
26
31
|
# Create a directory for word/document.xml if necessary
|
27
|
-
|
32
|
+
unless File.directory?("#{dirname}/word")
|
33
|
+
FileUtils.mkdir "#{dirname}/word"
|
34
|
+
end
|
28
35
|
|
29
36
|
# Write the modified XML to a file
|
30
37
|
File.open(out_path, "w") do |file|
|
@@ -50,7 +57,7 @@ module Konjac
|
|
50
57
|
basename = File.basename(sub_file, ".*")
|
51
58
|
xml_path = "#{dirname}/#{basename}_orig.xml"
|
52
59
|
clean_path = "#{dirname}/#{basename}.xml"
|
53
|
-
tags_path = "#{dirname}/#{basename}.
|
60
|
+
tags_path = "#{dirname}/#{basename}.konjac"
|
54
61
|
|
55
62
|
# Unzip the DOCX's word/document.xml file and pipe the output into
|
56
63
|
# an XML with the same base name as the DOCX
|
@@ -79,11 +86,16 @@ module Konjac
|
|
79
86
|
prev = node
|
80
87
|
end
|
81
88
|
|
89
|
+
# Write the tags file
|
90
|
+
index = 0
|
82
91
|
cleaner.xpath("//w:t").each do |node|
|
83
|
-
tags_file.puts node
|
92
|
+
tags_file.puts "[[KJ-%i]]%s" % [index, additional_info(node)]
|
93
|
+
tags_file.puts "> %s" % node.content
|
94
|
+
index += 1
|
84
95
|
end
|
85
96
|
end
|
86
97
|
|
98
|
+
# Write the cleaned-up XML to a file for inspection
|
87
99
|
File.open(clean_path, "w") do |xml|
|
88
100
|
xml.puts cleaner.to_xml
|
89
101
|
end
|
@@ -148,6 +160,18 @@ module Konjac
|
|
148
160
|
hash.delete :t
|
149
161
|
hash[:rPr][:rFonts][:attributes].delete :hint
|
150
162
|
end
|
163
|
+
|
164
|
+
# Get additional information on the node for context in tags file
|
165
|
+
def additional_info(node)
|
166
|
+
info = []
|
167
|
+
info << "hyperlink" if node.parent.parent.name == "hyperlink"
|
168
|
+
|
169
|
+
if info.empty?
|
170
|
+
""
|
171
|
+
else
|
172
|
+
" #=> #{info.join(", ")}"
|
173
|
+
end
|
174
|
+
end
|
151
175
|
end
|
152
176
|
end
|
153
177
|
end
|
data/lib/konjac.rb
CHANGED
@@ -8,6 +8,8 @@ module Konjac
|
|
8
8
|
autoload :CLI, "konjac/cli"
|
9
9
|
autoload :Dictionary, "konjac/dictionary"
|
10
10
|
autoload :Language, "konjac/language"
|
11
|
+
autoload :Tag, "konjac/tag"
|
12
|
+
autoload :TagManager, "konjac/tag_manager"
|
11
13
|
autoload :Translator, "konjac/translator"
|
12
14
|
autoload :Utils, "konjac/utils"
|
13
15
|
autoload :Word, "konjac/word"
|
data/spec/tag_spec.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require File.dirname(__FILE__) + "/spec_helper"
|
3
|
+
require "tempfile"
|
4
|
+
|
5
|
+
describe Tag do
|
6
|
+
before :each do
|
7
|
+
@tags_file = Tempfile.new(["tags", ".tags"])
|
8
|
+
@tags_file.write <<-eos.gsub(/^\s+/, "")
|
9
|
+
[[KJ-1]]
|
10
|
+
> 犬
|
11
|
+
dog
|
12
|
+
[[KJ-2]]
|
13
|
+
> 何ですか。
|
14
|
+
What is it?
|
15
|
+
[[KJ-3]]
|
16
|
+
> 空白
|
17
|
+
[[KJ-6]]
|
18
|
+
> 以上
|
19
|
+
-- end --
|
20
|
+
eos
|
21
|
+
@tags_file.rewind
|
22
|
+
@manager = TagManager.new(@tags_file.path)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should accurately read a tag file" do
|
26
|
+
@manager.all.should_not == nil
|
27
|
+
@manager[0].index.should == 1
|
28
|
+
@manager[0].original.should == "犬"
|
29
|
+
@manager[0].translated.should == "dog"
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should succeed reading multiple lines" do
|
33
|
+
@manager[1].index.should == 2
|
34
|
+
@manager[1].original.should == "何ですか。"
|
35
|
+
@manager[1].translated.should == "What is it?"
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should ignore blank translations" do
|
39
|
+
@manager[2].index.should == 3
|
40
|
+
@manager[2].original.should == "空白"
|
41
|
+
@manager[2].translated.should == nil
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should skip over blank indexes" do
|
45
|
+
@manager[3].index.should == 6
|
46
|
+
end
|
47
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjac
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70334829927100 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70334829927100
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bundler
|
27
|
-
requirement: &
|
27
|
+
requirement: &70334829926260 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70334829926260
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rspec
|
38
|
-
requirement: &
|
38
|
+
requirement: &70334829925520 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70334829925520
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: sdoc
|
49
|
-
requirement: &
|
49
|
+
requirement: &70334829925000 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70334829925000
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: autotest
|
60
|
-
requirement: &
|
60
|
+
requirement: &70334829924120 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70334829924120
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: autotest-fsevent
|
71
|
-
requirement: &
|
71
|
+
requirement: &70334829908620 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70334829908620
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: autotest-growl
|
82
|
-
requirement: &
|
82
|
+
requirement: &70334829907660 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *70334829907660
|
91
91
|
description: A Ruby command-line utility for translating files using a YAML wordlist
|
92
92
|
email:
|
93
93
|
- bryan.mckelvey@gmail.com
|
@@ -110,6 +110,8 @@ files:
|
|
110
110
|
- lib/konjac/dictionary.rb
|
111
111
|
- lib/konjac/exception.rb
|
112
112
|
- lib/konjac/language.rb
|
113
|
+
- lib/konjac/tag.rb
|
114
|
+
- lib/konjac/tag_manager.rb
|
113
115
|
- lib/konjac/translator.rb
|
114
116
|
- lib/konjac/utils.rb
|
115
117
|
- lib/konjac/version.rb
|
@@ -118,6 +120,7 @@ files:
|
|
118
120
|
- spec/dictionary_spec.rb
|
119
121
|
- spec/language_spec.rb
|
120
122
|
- spec/spec_helper.rb
|
123
|
+
- spec/tag_spec.rb
|
121
124
|
- spec/utils_spec.rb
|
122
125
|
homepage: http://brymck.herokuapp.com/
|
123
126
|
licenses: []
|
@@ -148,4 +151,5 @@ test_files:
|
|
148
151
|
- spec/dictionary_spec.rb
|
149
152
|
- spec/language_spec.rb
|
150
153
|
- spec/spec_helper.rb
|
154
|
+
- spec/tag_spec.rb
|
151
155
|
- spec/utils_spec.rb
|