konjac 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/konjac/version.rb +1 -1
- data/lib/konjac/word.rb +89 -7
- metadata +15 -15
data/lib/konjac/version.rb
CHANGED
data/lib/konjac/word.rb
CHANGED
@@ -46,26 +46,108 @@ module Konjac
|
|
46
46
|
sub_files = Dir.glob(File.expand_path(file))
|
47
47
|
sub_files.each do |sub_file|
|
48
48
|
# Build a list of all the paths we're working with
|
49
|
-
dirname
|
50
|
-
basename
|
51
|
-
xml_path
|
52
|
-
|
49
|
+
dirname = File.dirname(sub_file)
|
50
|
+
basename = File.basename(sub_file, ".*")
|
51
|
+
xml_path = "#{dirname}/#{basename}_orig.xml"
|
52
|
+
clean_path = "#{dirname}/#{basename}.xml"
|
53
|
+
tags_path = "#{dirname}/#{basename}.tags"
|
53
54
|
|
54
55
|
# Unzip the DOCX's word/document.xml file and pipe the output into
|
55
56
|
# an XML with the same base name as the DOCX
|
56
57
|
system "unzip -p #{sub_file} word/document.xml > #{xml_path}"
|
57
58
|
|
58
59
|
# Read in the XML file and extract the content from each <w:t> tag
|
59
|
-
|
60
|
+
cleaner = Nokogiri::XML(File.read(xml_path))
|
60
61
|
File.open(tags_path, "w") do |tags_file|
|
61
|
-
|
62
|
+
# Remove all grammar and spellcheck tags
|
63
|
+
cleaner.xpath("//w:proofErr").remove
|
64
|
+
|
65
|
+
nodes = cleaner.xpath("//w:r")
|
66
|
+
prev = nil
|
67
|
+
nodes.each do |node|
|
68
|
+
unless prev.nil?
|
69
|
+
if (prev.next_sibling == node) && compare_nodes(prev, node)
|
70
|
+
begin
|
71
|
+
node.at_xpath("w:t").content = prev.at_xpath("w:t").content +
|
72
|
+
node.at_xpath("w:t").content
|
73
|
+
prev.remove
|
74
|
+
rescue
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
prev = node
|
80
|
+
end
|
81
|
+
|
82
|
+
cleaner.xpath("//w:t").each do |node|
|
62
83
|
tags_file.puts node.content
|
63
84
|
end
|
64
85
|
end
|
86
|
+
|
87
|
+
File.open(clean_path, "w") do |xml|
|
88
|
+
xml.puts cleaner.to_xml
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# Performs a comparison between two nodes and accepts them as equivalent
|
97
|
+
# if the differences are very minor
|
98
|
+
def compare_nodes(a, b)
|
99
|
+
c = clean_hash(xml_node_to_hash(a))
|
100
|
+
d = clean_hash(xml_node_to_hash(b))
|
101
|
+
c == d
|
102
|
+
end
|
103
|
+
|
104
|
+
def xml_node_to_hash(node)
|
105
|
+
# If we are at the root of the document, start the hash
|
106
|
+
if node.element?
|
107
|
+
result_hash = {}
|
108
|
+
if node.attributes != {}
|
109
|
+
result_hash[:attributes] = {}
|
110
|
+
node.attributes.keys.each do |key|
|
111
|
+
result_hash[:attributes][node.attributes[key].name.to_sym] = prepare(node.attributes[key].value)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
if node.children.size > 0
|
115
|
+
node.children.each do |child|
|
116
|
+
result = xml_node_to_hash(child)
|
117
|
+
|
118
|
+
if child.name == "text"
|
119
|
+
unless child.next_sibling || child.previous_sibling
|
120
|
+
return prepare(result)
|
121
|
+
end
|
122
|
+
elsif result_hash[child.name.to_sym]
|
123
|
+
if result_hash[child.name.to_sym].is_a?(Array)
|
124
|
+
result_hash[child.name.to_sym] << prepare(result)
|
125
|
+
else
|
126
|
+
result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << prepare(result)
|
127
|
+
end
|
128
|
+
else
|
129
|
+
result_hash[child.name.to_sym] = prepare(result)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
return result_hash
|
134
|
+
else
|
135
|
+
return result_hash
|
65
136
|
end
|
137
|
+
else
|
138
|
+
return prepare(node.content.to_s)
|
66
139
|
end
|
67
140
|
end
|
141
|
+
|
142
|
+
def prepare(data)
|
143
|
+
(data.class == String && data.to_i.to_s == data) ? data.to_i : data
|
144
|
+
end
|
145
|
+
|
146
|
+
# Delete extraneous attributes for comparison
|
147
|
+
def clean_hash(hash)
|
148
|
+
hash.delete :t
|
149
|
+
hash[:rPr][:rFonts][:attributes].delete :hint
|
150
|
+
end
|
68
151
|
end
|
69
152
|
end
|
70
153
|
end
|
71
|
-
# lol
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjac
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-01-12 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314541350300 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314541350300
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bundler
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314541347900 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314541347900
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rspec
|
38
|
-
requirement: &
|
38
|
+
requirement: &70314541345520 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70314541345520
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: sdoc
|
49
|
-
requirement: &
|
49
|
+
requirement: &70314541340780 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70314541340780
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: autotest
|
60
|
-
requirement: &
|
60
|
+
requirement: &70314541332480 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70314541332480
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: autotest-fsevent
|
71
|
-
requirement: &
|
71
|
+
requirement: &70314541323420 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70314541323420
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: autotest-growl
|
82
|
-
requirement: &
|
82
|
+
requirement: &70314541318360 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *70314541318360
|
91
91
|
description: A Ruby command-line utility for translating files using a YAML wordlist
|
92
92
|
email:
|
93
93
|
- bryan.mckelvey@gmail.com
|