konjac 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/konjac/version.rb +1 -1
  2. data/lib/konjac/word.rb +89 -7
  3. metadata +15 -15
@@ -1,3 +1,3 @@
1
1
  module Konjac
2
- VERSION = "0.1"
2
+ VERSION = "0.1.1"
3
3
  end
data/lib/konjac/word.rb CHANGED
@@ -46,26 +46,108 @@ module Konjac
46
46
  sub_files = Dir.glob(File.expand_path(file))
47
47
  sub_files.each do |sub_file|
48
48
  # Build a list of all the paths we're working with
49
- dirname = File.dirname(sub_file)
50
- basename = File.basename(sub_file, ".*")
51
- xml_path = "#{dirname}/#{basename}.xml"
52
- tags_path = "#{dirname}/#{basename}.tags"
49
+ dirname = File.dirname(sub_file)
50
+ basename = File.basename(sub_file, ".*")
51
+ xml_path = "#{dirname}/#{basename}_orig.xml"
52
+ clean_path = "#{dirname}/#{basename}.xml"
53
+ tags_path = "#{dirname}/#{basename}.tags"
53
54
 
54
55
  # Unzip the DOCX's word/document.xml file and pipe the output into
55
56
  # an XML with the same base name as the DOCX
56
57
  system "unzip -p #{sub_file} word/document.xml > #{xml_path}"
57
58
 
58
59
  # Read in the XML file and extract the content from each <w:t> tag
59
- reader = Nokogiri::XML(File.read(xml_path))
60
+ cleaner = Nokogiri::XML(File.read(xml_path))
60
61
  File.open(tags_path, "w") do |tags_file|
61
- reader.xpath("//w:t").each do |node|
62
+ # Remove all grammar and spellcheck tags
63
+ cleaner.xpath("//w:proofErr").remove
64
+
65
+ nodes = cleaner.xpath("//w:r")
66
+ prev = nil
67
+ nodes.each do |node|
68
+ unless prev.nil?
69
+ if (prev.next_sibling == node) && compare_nodes(prev, node)
70
+ begin
71
+ node.at_xpath("w:t").content = prev.at_xpath("w:t").content +
72
+ node.at_xpath("w:t").content
73
+ prev.remove
74
+ rescue
75
+ end
76
+ end
77
+ end
78
+
79
+ prev = node
80
+ end
81
+
82
+ cleaner.xpath("//w:t").each do |node|
62
83
  tags_file.puts node.content
63
84
  end
64
85
  end
86
+
87
+ File.open(clean_path, "w") do |xml|
88
+ xml.puts cleaner.to_xml
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ # Performs a comparison between two nodes and accepts them as equivalent
97
+ # if the differences are very minor
98
+ def compare_nodes(a, b)
99
+ c = clean_hash(xml_node_to_hash(a))
100
+ d = clean_hash(xml_node_to_hash(b))
101
+ c == d
102
+ end
103
+
104
+ def xml_node_to_hash(node)
105
+ # If we are at the root of the document, start the hash
106
+ if node.element?
107
+ result_hash = {}
108
+ if node.attributes != {}
109
+ result_hash[:attributes] = {}
110
+ node.attributes.keys.each do |key|
111
+ result_hash[:attributes][node.attributes[key].name.to_sym] = prepare(node.attributes[key].value)
112
+ end
113
+ end
114
+ if node.children.size > 0
115
+ node.children.each do |child|
116
+ result = xml_node_to_hash(child)
117
+
118
+ if child.name == "text"
119
+ unless child.next_sibling || child.previous_sibling
120
+ return prepare(result)
121
+ end
122
+ elsif result_hash[child.name.to_sym]
123
+ if result_hash[child.name.to_sym].is_a?(Array)
124
+ result_hash[child.name.to_sym] << prepare(result)
125
+ else
126
+ result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << prepare(result)
127
+ end
128
+ else
129
+ result_hash[child.name.to_sym] = prepare(result)
130
+ end
131
+ end
132
+
133
+ return result_hash
134
+ else
135
+ return result_hash
65
136
  end
137
+ else
138
+ return prepare(node.content.to_s)
66
139
  end
67
140
  end
141
+
142
+ def prepare(data)
143
+ (data.class == String && data.to_i.to_s == data) ? data.to_i : data
144
+ end
145
+
146
+ # Delete extraneous attributes for comparison
147
+ def clean_hash(hash)
148
+ hash.delete :t
149
+ hash[:rPr][:rFonts][:attributes].delete :hint
150
+ end
68
151
  end
69
152
  end
70
153
  end
71
- # lol
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjac
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-01-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70261374910800 !ruby/object:Gem::Requirement
16
+ requirement: &70314541350300 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70261374910800
24
+ version_requirements: *70314541350300
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bundler
27
- requirement: &70261374908420 !ruby/object:Gem::Requirement
27
+ requirement: &70314541347900 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70261374908420
35
+ version_requirements: *70314541347900
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &70261374907000 !ruby/object:Gem::Requirement
38
+ requirement: &70314541345520 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70261374907000
46
+ version_requirements: *70314541345520
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: sdoc
49
- requirement: &70261374906120 !ruby/object:Gem::Requirement
49
+ requirement: &70314541340780 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70261374906120
57
+ version_requirements: *70314541340780
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: autotest
60
- requirement: &70261374905300 !ruby/object:Gem::Requirement
60
+ requirement: &70314541332480 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70261374905300
68
+ version_requirements: *70314541332480
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: autotest-fsevent
71
- requirement: &70261374904580 !ruby/object:Gem::Requirement
71
+ requirement: &70314541323420 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70261374904580
79
+ version_requirements: *70314541323420
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: autotest-growl
82
- requirement: &70261374903400 !ruby/object:Gem::Requirement
82
+ requirement: &70314541318360 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *70261374903400
90
+ version_requirements: *70314541318360
91
91
  description: A Ruby command-line utility for translating files using a YAML wordlist
92
92
  email:
93
93
  - bryan.mckelvey@gmail.com