konjac 0.0.5 → 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/konjac.gemspec +1 -1
- data/lib/konjac/cli.rb +4 -0
- data/lib/konjac/utils.rb +1 -1
- data/lib/konjac/version.rb +1 -1
- data/lib/konjac/word.rb +71 -0
- data/lib/konjac.rb +2 -0
- metadata +25 -13
data/konjac.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
19
|
s.require_paths = ["lib"]
|
20
20
|
|
21
|
-
|
21
|
+
s.add_runtime_dependency "nokogiri"
|
22
22
|
s.add_development_dependency "bundler"
|
23
23
|
s.add_development_dependency "rspec"
|
24
24
|
s.add_development_dependency "sdoc"
|
data/lib/konjac/cli.rb
CHANGED
data/lib/konjac/utils.rb
CHANGED
data/lib/konjac/version.rb
CHANGED
data/lib/konjac/word.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
module Konjac
|
2
|
+
module Word
|
3
|
+
class << self
|
4
|
+
# Extracts the text content from a Microsoft Word 2003+ Document
|
5
|
+
def import_docx_tags(files)
|
6
|
+
files.each do |file|
|
7
|
+
sub_files = Dir.glob(File.expand_path(file))
|
8
|
+
sub_files.each do |sub_file|
|
9
|
+
# Build the list of paths we need to work with
|
10
|
+
dirname = File.dirname(sub_file)
|
11
|
+
basename = File.basename(sub_file, ".*")
|
12
|
+
new_path = "#{dirname}/#{basename}_imported.docx"
|
13
|
+
xml_path = "#{dirname}/#{basename}.xml"
|
14
|
+
tags_path = "#{dirname}/#{basename}.tags"
|
15
|
+
out_path = "#{dirname}/word/document.xml"
|
16
|
+
|
17
|
+
# Open the original XML file and the updated tags
|
18
|
+
writer = Nokogiri::XML(File.read(xml_path))
|
19
|
+
tags = File.readlines(tags_path)
|
20
|
+
|
21
|
+
# Overwrite each <w:t> tag's content with the new tag
|
22
|
+
writer.xpath("//w:t").each do |node|
|
23
|
+
node.content = tags.shift.tr("\n", "")
|
24
|
+
end
|
25
|
+
|
26
|
+
# Create a directory for word/document.xml if necessary
|
27
|
+
FileUtils.mkdir "#{dirname}/word" unless File.directory?("#{dirname}/word")
|
28
|
+
|
29
|
+
# Write the modified XML to a file
|
30
|
+
File.open(out_path, "w") do |file|
|
31
|
+
file.write writer.to_xml.gsub(/\n\s*/, "").sub(/\?></, "?>\n<")
|
32
|
+
end
|
33
|
+
|
34
|
+
# Copy the original file
|
35
|
+
FileUtils.cp sub_file, new_path
|
36
|
+
|
37
|
+
# Add the new document XML to the copied file
|
38
|
+
system "cd #{dirname} && zip -q #{new_path} word/document.xml"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Extracts the text content from a Microsoft Word 2003+ Document
|
44
|
+
def extract_docx_tags(files)
|
45
|
+
files.each do |file|
|
46
|
+
sub_files = Dir.glob(File.expand_path(file))
|
47
|
+
sub_files.each do |sub_file|
|
48
|
+
# Build a list of all the paths we're working with
|
49
|
+
dirname = File.dirname(sub_file)
|
50
|
+
basename = File.basename(sub_file, ".*")
|
51
|
+
xml_path = "#{dirname}/#{basename}.xml"
|
52
|
+
tags_path = "#{dirname}/#{basename}.tags"
|
53
|
+
|
54
|
+
# Unzip the DOCX's word/document.xml file and pipe the output into
|
55
|
+
# an XML with the same base name as the DOCX
|
56
|
+
system "unzip -p #{sub_file} word/document.xml > #{xml_path}"
|
57
|
+
|
58
|
+
# Read in the XML file and extract the content from each <w:t> tag
|
59
|
+
reader = Nokogiri::XML(File.read(xml_path))
|
60
|
+
File.open(tags_path, "w") do |tags_file|
|
61
|
+
reader.xpath("//w:t").each do |node|
|
62
|
+
tags_file.puts node.content
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
# lol
|
data/lib/konjac.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "konjac/version"
|
2
2
|
require "konjac/exception"
|
3
3
|
autoload :FileUtils, "fileutils"
|
4
|
+
autoload :Nokogiri, "nokogiri"
|
4
5
|
|
5
6
|
module Konjac
|
6
7
|
# Set up autoload for all modules
|
@@ -9,4 +10,5 @@ module Konjac
|
|
9
10
|
autoload :Language, "konjac/language"
|
10
11
|
autoload :Translator, "konjac/translator"
|
11
12
|
autoload :Utils, "konjac/utils"
|
13
|
+
autoload :Word, "konjac/word"
|
12
14
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjac
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.1'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,9 +11,20 @@ bindir: bin
|
|
11
11
|
cert_chain: []
|
12
12
|
date: 2012-01-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &70261374910800 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70261374910800
|
14
25
|
- !ruby/object:Gem::Dependency
|
15
26
|
name: bundler
|
16
|
-
requirement: &
|
27
|
+
requirement: &70261374908420 !ruby/object:Gem::Requirement
|
17
28
|
none: false
|
18
29
|
requirements:
|
19
30
|
- - ! '>='
|
@@ -21,10 +32,10 @@ dependencies:
|
|
21
32
|
version: '0'
|
22
33
|
type: :development
|
23
34
|
prerelease: false
|
24
|
-
version_requirements: *
|
35
|
+
version_requirements: *70261374908420
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: rspec
|
27
|
-
requirement: &
|
38
|
+
requirement: &70261374907000 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: '0'
|
33
44
|
type: :development
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *70261374907000
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: sdoc
|
38
|
-
requirement: &
|
49
|
+
requirement: &70261374906120 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ! '>='
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: '0'
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *70261374906120
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: autotest
|
49
|
-
requirement: &
|
60
|
+
requirement: &70261374905300 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: '0'
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *70261374905300
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: autotest-fsevent
|
60
|
-
requirement: &
|
71
|
+
requirement: &70261374904580 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ! '>='
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: '0'
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *70261374904580
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: autotest-growl
|
71
|
-
requirement: &
|
82
|
+
requirement: &70261374903400 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ! '>='
|
@@ -76,7 +87,7 @@ dependencies:
|
|
76
87
|
version: '0'
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *70261374903400
|
80
91
|
description: A Ruby command-line utility for translating files using a YAML wordlist
|
81
92
|
email:
|
82
93
|
- bryan.mckelvey@gmail.com
|
@@ -102,6 +113,7 @@ files:
|
|
102
113
|
- lib/konjac/translator.rb
|
103
114
|
- lib/konjac/utils.rb
|
104
115
|
- lib/konjac/version.rb
|
116
|
+
- lib/konjac/word.rb
|
105
117
|
- spec/cli_spec.rb
|
106
118
|
- spec/dictionary_spec.rb
|
107
119
|
- spec/language_spec.rb
|