universal_excel_parser 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/universal_excel_parser.rb +221 -0
- metadata +57 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a042c3feaaebe5d532e85ccb7b6cfd9ff6d787dc
|
4
|
+
data.tar.gz: e46cb8657a18bc2e93581e8e2d48565685fa62aa
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2b99cff666a7d8ba425f3e018d58048b82df2bd533be7c8af70d7585b92cd095c45c2f8d2ce2bf13d938e428500d041643b6412e3a6defadf3516c698f5f6258
|
7
|
+
data.tar.gz: 5c3c75374cec78681f9a9cb0efd0b57d1b2158143952f356d3ee27afd72023ec27cfdd310acd02dc0712c1763ab5ecd6537be980c74a0be74f397572c9fca685
|
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'simple_xlsx_reader'
|
2
|
+
require 'logger'
|
3
|
+
class UniversalExcelParser
|
4
|
+
attr_accessor :xls_file, :file_path, :data_hash, :validator_hashes, :logger
|
5
|
+
|
6
|
+
def initialize(p_file_path, p_data_hash, p_validator_hashes, p_logger_level = Logger::ERROR)
|
7
|
+
self.file_path = p_file_path
|
8
|
+
self.data_hash = p_data_hash
|
9
|
+
self.validator_hashes = p_validator_hashes
|
10
|
+
self.logger = Logger.new(STDOUT)
|
11
|
+
logger.level = p_logger_level
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse_hash_with_sheet_identification(file_path, data_hash, validator_hashes={})
|
15
|
+
uep = UniversalExcelParser.new(file_path, data_hash, validator_hashes)
|
16
|
+
uep.logger.info "READING FILE #{file_path})"
|
17
|
+
return false unless uep.validate_file_type(uep.file_path)
|
18
|
+
return false unless uep.read_file(uep.file_path)
|
19
|
+
return false unless uep.validate_readability_of_all_sheets_with_identification(data_hash)
|
20
|
+
data = uep.read_hash_with_sheet_identification(data_hash)
|
21
|
+
uep.logger.info "READING FILE #{file_path}: FINISHED"
|
22
|
+
data
|
23
|
+
end
|
24
|
+
|
25
|
+
def read_hash_with_sheet_identification(input)
|
26
|
+
logger.debug "[+] READING THE FILES WITH SHEET IDENTIFICATION"
|
27
|
+
# HASH SYNTAX: {:CUSTOM_NAME => {:sheet_identification => {:row => 123, :column => 123, :content => "TEXT"}, :data => [{:KEY_IS_KEY => [ROW, COLUMN]}}
|
28
|
+
unless input.class == Hash
|
29
|
+
logger.error " [ERROR] INPUT IS NOT AN HASH"
|
30
|
+
return false
|
31
|
+
end
|
32
|
+
output_data = {}
|
33
|
+
sheet_ids = (0..(xls_file.sheets.count-1)).to_a
|
34
|
+
input.each do |key,value|
|
35
|
+
sheet_ids.delete_if do |sid|
|
36
|
+
next unless field_exists?(sid, value[:sheet_identification][:row], value[:sheet_identification][:column])
|
37
|
+
next unless self.xls_file.sheets[sid].rows[value[:sheet_identification][:row]][value[:sheet_identification][:column]] == value[:sheet_identification][:content]
|
38
|
+
hash = {}
|
39
|
+
value[:data].each do |data_key, data_value|
|
40
|
+
unless field_exists?(sid, data_value[0], data_value[1])
|
41
|
+
logger.error "FIELD DOES NOT EXISTS [#{sid}][#{data_value[0]}][#{data_value[1]}]"
|
42
|
+
end
|
43
|
+
hash[data_key] = self.xls_file.sheets[sid].rows[data_value[0]][data_value[1]]
|
44
|
+
end
|
45
|
+
output_data[key] = [] if output_data[key].class != Array
|
46
|
+
output_data[key] << hash
|
47
|
+
true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
logger.debug " [ERROR] SHEETS THAT NOT READ: #{sheet_ids}"
|
51
|
+
output_data
|
52
|
+
end
|
53
|
+
|
54
|
+
def validate_readability_of_all_sheets_with_identification(input)
|
55
|
+
logger.debug "[+] TRYING TO READ ALL SHEETS"
|
56
|
+
# HASH SYNTAX: {:CUSTOM_NAME => {:sheet_identification => {:row => 123, :column => 123, :content => "TEXT"}, :data => [{:KEY_IS_KEY => [ROW, COLUMN]}}
|
57
|
+
unless input.class == Hash
|
58
|
+
logger.error " [ERROR] INPUT IS NOT AN HASH"
|
59
|
+
return false
|
60
|
+
end
|
61
|
+
output_data = {}
|
62
|
+
sheet_ids = (0..(xls_file.sheets.count-1)).to_a
|
63
|
+
input.each do |key,value|
|
64
|
+
sheet_ids.delete_if do |sid|
|
65
|
+
next unless field_exists?(sid, value[:sheet_identification][:row], value[:sheet_identification][:column])
|
66
|
+
next unless self.xls_file.sheets[sid].rows[value[:sheet_identification][:row]][value[:sheet_identification][:column]] == value[:sheet_identification][:content]
|
67
|
+
hash = {}
|
68
|
+
value[:data].each do |data_key, data_value|
|
69
|
+
return false unless field_exists?(sid, data_value[0], data_value[1])
|
70
|
+
hash[data_key] = self.xls_file.sheets[sid].rows[data_value[0]][data_value[1]]
|
71
|
+
end
|
72
|
+
output_data[key] = [] if output_data[key].class != Array
|
73
|
+
output_data[key] << hash
|
74
|
+
true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
if sheet_ids.count != 0
|
78
|
+
logger.error "I CANT READ ALL SHEETS. NOT READABLE SHEETS:"
|
79
|
+
sheet_ids.each do |sid|
|
80
|
+
logger.error " - #{xls_file.sheets[sid].name}"
|
81
|
+
end
|
82
|
+
return false
|
83
|
+
end
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
def file_has_sheets?
|
88
|
+
if self.xls_file.sheets.count != 0
|
89
|
+
logger.debug " [+] FILE HAS #{xls_file.sheets.count} SHEETS"
|
90
|
+
return true
|
91
|
+
else
|
92
|
+
logger.error " [ERROR] FILE HAS NO SHEETS"
|
93
|
+
return false
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def validate_sheet_count(count)
|
98
|
+
if xls_file.sheets.count == value
|
99
|
+
logger.debug " [+] THE FILE HAS THE CORRECT COUNT OF SHEETS(#{value}/#{xls_file.sheets.count})"
|
100
|
+
return true
|
101
|
+
else
|
102
|
+
logger.error " [ERROR] THE FILE HAS A WRONG COUNT OF SHEETS(#{value}/#{xls_file.sheets.count})"
|
103
|
+
return false
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def field_exists?(sheet, row, column)
|
108
|
+
return false unless row_exists?(sheet, row)
|
109
|
+
unless hash[column].class == Integer
|
110
|
+
logger.error " [ERROR] COLUMN NUMBER IS NOT AN INTEGER!"
|
111
|
+
return false
|
112
|
+
end
|
113
|
+
if xls_file.sheets[sheet].rows[row].count <= column
|
114
|
+
logger.error " [ERROR] COLUMN NUMBER IS HIGHER THAN THE ROWS IN THE FILE!"
|
115
|
+
return false
|
116
|
+
end
|
117
|
+
true
|
118
|
+
end
|
119
|
+
|
120
|
+
def row_exists?(sheet, row)
|
121
|
+
return false unless sheet_exists?(sheet)
|
122
|
+
unless row.class == Integer
|
123
|
+
logger.error " [ERROR] ROW IS NOT AN INTEGER!"
|
124
|
+
return false
|
125
|
+
end
|
126
|
+
if xls_file.sheets[sheet].rows.count <= row
|
127
|
+
logger.error " [ERROR] ROW NUMBER IS HIGHER THAN THE ROWS IN FILE!"
|
128
|
+
return false
|
129
|
+
end
|
130
|
+
true
|
131
|
+
end
|
132
|
+
|
133
|
+
def sheet_exists?(sheet)
|
134
|
+
logger.debug "[+] CHECKING IF THE SHEET EXISTS"
|
135
|
+
unless sheet.class == Integer
|
136
|
+
logger.error " [ERROR] SHEET NUMBER IS NOT AN INTEGER!"
|
137
|
+
return false
|
138
|
+
end
|
139
|
+
if xls_file.sheets.count <= sheet
|
140
|
+
logger.error " [ERROR] SHEET NUMBER IS HIGHER THAN THE SHEET COUNT!"
|
141
|
+
return false
|
142
|
+
end
|
143
|
+
logger.debug "[-] SHEET IS VALID"
|
144
|
+
true
|
145
|
+
end
|
146
|
+
|
147
|
+
def validate_content(validator_hash, sid)
|
148
|
+
# SYNTAX:
|
149
|
+
# {
|
150
|
+
# :field_content => [
|
151
|
+
# {
|
152
|
+
# :row => 0,
|
153
|
+
# :column => 0,
|
154
|
+
# :content => "TEXT_IN_FIELD"
|
155
|
+
# }
|
156
|
+
# ]
|
157
|
+
# }
|
158
|
+
logger.debug "[+] VALIDATE CONTENT OF EXCEL_SHEET WITH ID #{sid} AND VALIDATOR_HASH #{validator_hash}"
|
159
|
+
return false unless file_has_sheets?
|
160
|
+
return false unless sheet_exists?(sid)
|
161
|
+
validator_hash.keys.each do |key, value|
|
162
|
+
if key == :field_content
|
163
|
+
logger.debug " [+] VALIDATE THE CONTENT OF SOME FIELDS"
|
164
|
+
if value.class == Array
|
165
|
+
value.each do |hash|
|
166
|
+
logger.debug " [+] VALIDATING FIELD #{self.xls_file.sheets[sid].name}[#{hash[:row]}][#{hash[:column]}]"
|
167
|
+
return false unless field_exists?(sid, hash[:row], hash[:column])
|
168
|
+
if xls_file.sheets[sid].rows[hash[:row]][hash[:column]] == hash[:content]
|
169
|
+
logger.debug " [OK] FIELD IS VALID!"
|
170
|
+
else
|
171
|
+
logger.error " [ERROR] FIELD IS NOT VALID! SHEET: #{self.xls_file.sheets[sid].name}[#{hash[:row]}][#{hash[:column]}] FIELD-CONTENT: #{xls_file.sheets[sid].rows[hash[:row]][hash[:column]]} VALIDATOR: #{hash}"
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
logger.debug " [-] FIELD IS VALID"
|
175
|
+
end
|
176
|
+
else
|
177
|
+
logger.error " [ERROR] validator_hash[:field_content] IS NOT AN ARRAY: validator_hash(#{validator_hash})"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
logger.debug " [-] FIELDS ARE VALID"
|
181
|
+
else
|
182
|
+
logger.error " [ERROR] validator_hash: #{key} IS NOT IMPLEMENTED"
|
183
|
+
return false
|
184
|
+
end
|
185
|
+
end
|
186
|
+
logger.debug "[-] EXCEL_SHEET IS VALID"
|
187
|
+
true
|
188
|
+
end
|
189
|
+
|
190
|
+
def validate_file_type(file_path)
|
191
|
+
logger.debug "[+] VALIDATING FILE TYPE OF #{file_path}"
|
192
|
+
if File.exists?(file_path)
|
193
|
+
logger.debug " [OK] THE FILE EXISTS"
|
194
|
+
else
|
195
|
+
logger.error " [ERROR] THE FILE DOES NOT EXISTS: function(validate_file_type) file_path(#{file_path})"
|
196
|
+
return false
|
197
|
+
end
|
198
|
+
mime_type = `file --brief --mime-type "#{file_path}"`.strip
|
199
|
+
if mime_type == "application/zip" or mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
200
|
+
logger.debug " [OK] MIME TYPE IS #{mime_type} READABLE"
|
201
|
+
else
|
202
|
+
logger.error " [ERROR] MIME TYPE IS NOT COMPATIBLE: function(validate_file_type) file_path(#{file_path})"
|
203
|
+
return false
|
204
|
+
end
|
205
|
+
logger.debug "[-] FILE TYPE IS VALID"
|
206
|
+
true
|
207
|
+
end
|
208
|
+
|
209
|
+
def read_file(file_path)
|
210
|
+
logger.debug "[+] TRYING TO READ FILE #{file_path}"
|
211
|
+
self.xls_file = SimpleXlsxReader.open(file_path)
|
212
|
+
if self.xls_file.kind_of?(SimpleXlsxReader::Document)
|
213
|
+
logger.debug " [OK] FILE IS READABLE WITH THE SimpleXlsxReader"
|
214
|
+
else
|
215
|
+
logger.error " [ERROR] FILE IS NOT READABLE WITH The SimpleXlsxReader: function(read_file) file_path(#{file_path})"
|
216
|
+
return false
|
217
|
+
end
|
218
|
+
logger.debug "[-] END TRYING TO READ FILE"
|
219
|
+
true
|
220
|
+
end
|
221
|
+
end
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: universal_excel_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- cdt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-08-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: simple_xlsx_reader
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
description:
|
28
|
+
email: codingdt@gmx.de
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- lib/universal_excel_parser.rb
|
34
|
+
homepage: http://rubygems.org/gems/universal_excel_parser
|
35
|
+
licenses: []
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
requirements: []
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 2.5.2
|
54
|
+
signing_key:
|
55
|
+
specification_version: 4
|
56
|
+
summary: Hash based excel parser
|
57
|
+
test_files: []
|