universal_excel_parser 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/universal_excel_parser.rb +221 -0
  3. metadata +57 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a042c3feaaebe5d532e85ccb7b6cfd9ff6d787dc
4
+ data.tar.gz: e46cb8657a18bc2e93581e8e2d48565685fa62aa
5
+ SHA512:
6
+ metadata.gz: 2b99cff666a7d8ba425f3e018d58048b82df2bd533be7c8af70d7585b92cd095c45c2f8d2ce2bf13d938e428500d041643b6412e3a6defadf3516c698f5f6258
7
+ data.tar.gz: 5c3c75374cec78681f9a9cb0efd0b57d1b2158143952f356d3ee27afd72023ec27cfdd310acd02dc0712c1763ab5ecd6537be980c74a0be74f397572c9fca685
@@ -0,0 +1,221 @@
1
+ require 'simple_xlsx_reader'
2
+ require 'logger'
3
+ class UniversalExcelParser
4
+ attr_accessor :xls_file, :file_path, :data_hash, :validator_hashes, :logger
5
+
6
+ def initialize(p_file_path, p_data_hash, p_validator_hashes, p_logger_level = Logger::ERROR)
7
+ self.file_path = p_file_path
8
+ self.data_hash = p_data_hash
9
+ self.validator_hashes = p_validator_hashes
10
+ self.logger = Logger.new(STDOUT)
11
+ logger.level = p_logger_level
12
+ end
13
+
14
+ def self.parse_hash_with_sheet_identification(file_path, data_hash, validator_hashes={})
15
+ uep = UniversalExcelParser.new(file_path, data_hash, validator_hashes)
16
+ uep.logger.info "READING FILE #{file_path})"
17
+ return false unless uep.validate_file_type(uep.file_path)
18
+ return false unless uep.read_file(uep.file_path)
19
+ return false unless uep.validate_readability_of_all_sheets_with_identification(data_hash)
20
+ data = uep.read_hash_with_sheet_identification(data_hash)
21
+ uep.logger.info "READING FILE #{file_path}: FINISHED"
22
+ data
23
+ end
24
+
25
+ def read_hash_with_sheet_identification(input)
26
+ logger.debug "[+] READING THE FILES WITH SHEET IDENTIFICATION"
27
+ # HASH SYNTAX: {:CUSTOM_NAME => {:sheet_identification => {:row => 123, :column => 123, :content => "TEXT"}, :data => [{:KEY_IS_KEY => [ROW, COLUMN]}}
28
+ unless input.class == Hash
29
+ logger.error " [ERROR] INPUT IS NOT AN HASH"
30
+ return false
31
+ end
32
+ output_data = {}
33
+ sheet_ids = (0..(xls_file.sheets.count-1)).to_a
34
+ input.each do |key,value|
35
+ sheet_ids.delete_if do |sid|
36
+ next unless field_exists?(sid, value[:sheet_identification][:row], value[:sheet_identification][:column])
37
+ next unless self.xls_file.sheets[sid].rows[value[:sheet_identification][:row]][value[:sheet_identification][:column]] == value[:sheet_identification][:content]
38
+ hash = {}
39
+ value[:data].each do |data_key, data_value|
40
+ unless field_exists?(sid, data_value[0], data_value[1])
41
+ logger.error "FIELD DOES NOT EXISTS [#{sid}][#{data_value[0]}][#{data_value[1]}]"
42
+ end
43
+ hash[data_key] = self.xls_file.sheets[sid].rows[data_value[0]][data_value[1]]
44
+ end
45
+ output_data[key] = [] if output_data[key].class != Array
46
+ output_data[key] << hash
47
+ true
48
+ end
49
+ end
50
+ logger.debug " [ERROR] SHEETS THAT NOT READ: #{sheet_ids}"
51
+ output_data
52
+ end
53
+
54
+ def validate_readability_of_all_sheets_with_identification(input)
55
+ logger.debug "[+] TRYING TO READ ALL SHEETS"
56
+ # HASH SYNTAX: {:CUSTOM_NAME => {:sheet_identification => {:row => 123, :column => 123, :content => "TEXT"}, :data => [{:KEY_IS_KEY => [ROW, COLUMN]}}
57
+ unless input.class == Hash
58
+ logger.error " [ERROR] INPUT IS NOT AN HASH"
59
+ return false
60
+ end
61
+ output_data = {}
62
+ sheet_ids = (0..(xls_file.sheets.count-1)).to_a
63
+ input.each do |key,value|
64
+ sheet_ids.delete_if do |sid|
65
+ next unless field_exists?(sid, value[:sheet_identification][:row], value[:sheet_identification][:column])
66
+ next unless self.xls_file.sheets[sid].rows[value[:sheet_identification][:row]][value[:sheet_identification][:column]] == value[:sheet_identification][:content]
67
+ hash = {}
68
+ value[:data].each do |data_key, data_value|
69
+ return false unless field_exists?(sid, data_value[0], data_value[1])
70
+ hash[data_key] = self.xls_file.sheets[sid].rows[data_value[0]][data_value[1]]
71
+ end
72
+ output_data[key] = [] if output_data[key].class != Array
73
+ output_data[key] << hash
74
+ true
75
+ end
76
+ end
77
+ if sheet_ids.count != 0
78
+ logger.error "I CANT READ ALL SHEETS. NOT READABLE SHEETS:"
79
+ sheet_ids.each do |sid|
80
+ logger.error " - #{xls_file.sheets[sid].name}"
81
+ end
82
+ return false
83
+ end
84
+ true
85
+ end
86
+
87
+ def file_has_sheets?
88
+ if self.xls_file.sheets.count != 0
89
+ logger.debug " [+] FILE HAS #{xls_file.sheets.count} SHEETS"
90
+ return true
91
+ else
92
+ logger.error " [ERROR] FILE HAS NO SHEETS"
93
+ return false
94
+ end
95
+ end
96
+
97
+ def validate_sheet_count(count)
98
+ if xls_file.sheets.count == value
99
+ logger.debug " [+] THE FILE HAS THE CORRECT COUNT OF SHEETS(#{value}/#{xls_file.sheets.count})"
100
+ return true
101
+ else
102
+ logger.error " [ERROR] THE FILE HAS A WRONG COUNT OF SHEETS(#{value}/#{xls_file.sheets.count})"
103
+ return false
104
+ end
105
+ end
106
+
107
+ def field_exists?(sheet, row, column)
108
+ return false unless row_exists?(sheet, row)
109
+ unless hash[column].class == Integer
110
+ logger.error " [ERROR] COLUMN NUMBER IS NOT AN INTEGER!"
111
+ return false
112
+ end
113
+ if xls_file.sheets[sheet].rows[row].count <= column
114
+ logger.error " [ERROR] COLUMN NUMBER IS HIGHER THAN THE ROWS IN THE FILE!"
115
+ return false
116
+ end
117
+ true
118
+ end
119
+
120
+ def row_exists?(sheet, row)
121
+ return false unless sheet_exists?(sheet)
122
+ unless row.class == Integer
123
+ logger.error " [ERROR] ROW IS NOT AN INTEGER!"
124
+ return false
125
+ end
126
+ if xls_file.sheets[sheet].rows.count <= row
127
+ logger.error " [ERROR] ROW NUMBER IS HIGHER THAN THE ROWS IN FILE!"
128
+ return false
129
+ end
130
+ true
131
+ end
132
+
133
+ def sheet_exists?(sheet)
134
+ logger.debug "[+] CHECKING IF THE SHEET EXISTS"
135
+ unless sheet.class == Integer
136
+ logger.error " [ERROR] SHEET NUMBER IS NOT AN INTEGER!"
137
+ return false
138
+ end
139
+ if xls_file.sheets.count <= sheet
140
+ logger.error " [ERROR] SHEET NUMBER IS HIGHER THAN THE SHEET COUNT!"
141
+ return false
142
+ end
143
+ logger.debug "[-] SHEET IS VALID"
144
+ true
145
+ end
146
+
147
+ def validate_content(validator_hash, sid)
148
+ # SYNTAX:
149
+ # {
150
+ # :field_content => [
151
+ # {
152
+ # :row => 0,
153
+ # :column => 0,
154
+ # :content => "TEXT_IN_FIELD"
155
+ # }
156
+ # ]
157
+ # }
158
+ logger.debug "[+] VALIDATE CONTENT OF EXCEL_SHEET WITH ID #{sid} AND VALIDATOR_HASH #{validator_hash}"
159
+ return false unless file_has_sheets?
160
+ return false unless sheet_exists?(sid)
161
+ validator_hash.keys.each do |key, value|
162
+ if key == :field_content
163
+ logger.debug " [+] VALIDATE THE CONTENT OF SOME FIELDS"
164
+ if value.class == Array
165
+ value.each do |hash|
166
+ logger.debug " [+] VALIDATING FIELD #{self.xls_file.sheets[sid].name}[#{hash[:row]}][#{hash[:column]}]"
167
+ return false unless field_exists?(sid, hash[:row], hash[:column])
168
+ if xls_file.sheets[sid].rows[hash[:row]][hash[:column]] == hash[:content]
169
+ logger.debug " [OK] FIELD IS VALID!"
170
+ else
171
+ logger.error " [ERROR] FIELD IS NOT VALID! SHEET: #{self.xls_file.sheets[sid].name}[#{hash[:row]}][#{hash[:column]}] FIELD-CONTENT: #{xls_file.sheets[sid].rows[hash[:row]][hash[:column]]} VALIDATOR: #{hash}"
172
+ return false
173
+ end
174
+ logger.debug " [-] FIELD IS VALID"
175
+ end
176
+ else
177
+ logger.error " [ERROR] validator_hash[:field_content] IS NOT AN ARRAY: validator_hash(#{validator_hash})"
178
+ return false
179
+ end
180
+ logger.debug " [-] FIELDS ARE VALID"
181
+ else
182
+ logger.error " [ERROR] validator_hash: #{key} IS NOT IMPLEMENTED"
183
+ return false
184
+ end
185
+ end
186
+ logger.debug "[-] EXCEL_SHEET IS VALID"
187
+ true
188
+ end
189
+
190
+ def validate_file_type(file_path)
191
+ logger.debug "[+] VALIDATING FILE TYPE OF #{file_path}"
192
+ if File.exists?(file_path)
193
+ logger.debug " [OK] THE FILE EXISTS"
194
+ else
195
+ logger.error " [ERROR] THE FILE DOES NOT EXISTS: function(validate_file_type) file_path(#{file_path})"
196
+ return false
197
+ end
198
+ mime_type = `file --brief --mime-type "#{file_path}"`.strip
199
+ if mime_type == "application/zip" or mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
200
+ logger.debug " [OK] MIME TYPE IS #{mime_type} READABLE"
201
+ else
202
+ logger.error " [ERROR] MIME TYPE IS NOT COMPATIBLE: function(validate_file_type) file_path(#{file_path})"
203
+ return false
204
+ end
205
+ logger.debug "[-] FILE TYPE IS VALID"
206
+ true
207
+ end
208
+
209
+ def read_file(file_path)
210
+ logger.debug "[+] TRYING TO READ FILE #{file_path}"
211
+ self.xls_file = SimpleXlsxReader.open(file_path)
212
+ if self.xls_file.kind_of?(SimpleXlsxReader::Document)
213
+ logger.debug " [OK] FILE IS READABLE WITH THE SimpleXlsxReader"
214
+ else
215
+ logger.error " [ERROR] FILE IS NOT READABLE WITH The SimpleXlsxReader: function(read_file) file_path(#{file_path})"
216
+ return false
217
+ end
218
+ logger.debug "[-] END TRYING TO READ FILE"
219
+ true
220
+ end
221
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: universal_excel_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0
5
+ platform: ruby
6
+ authors:
7
+ - cdt
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-08-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: simple_xlsx_reader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ description:
28
+ email: codingdt@gmx.de
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/universal_excel_parser.rb
34
+ homepage: http://rubygems.org/gems/universal_excel_parser
35
+ licenses: []
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 2.5.2
54
+ signing_key:
55
+ specification_version: 4
56
+ summary: Hash based excel parser
57
+ test_files: []