excelx_preview 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.xlsx*
2
+ *.xls*
3
+ .bundle/
4
+ .idea/
5
+ *.lock
6
+ vendor/cache/
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source :gemcutter
2
+ source :rubygems
3
+ group :development do
4
+ gem "rspec"
5
+ gem "rake"
6
+ gem "guard"
7
+ gem "guard-rspec"
8
+ gem "simplecov"
9
+ gem "flog"
10
+ gem "yard"
11
+ gem "ci_reporter"
12
+ gem "simplecov-rcov"
13
+ gem "rdiscount"
14
+ end
15
+
16
+ gemspec
data/README.md ADDED
@@ -0,0 +1,20 @@
1
+ ## Description
2
+
3
+ Gem that extracts the first 10 lines of an Excel X file.
4
+
5
+ ## Install
6
+
7
+ Install rvm and ruby-1.9.2
8
+
9
+
10
+ ## Then do:
11
+
12
+ $ gem install bundler
13
+ $ bundle install --path vendor/bundle
14
+
15
+ ## Usage
16
+
17
+ require 'excelx_preview'
18
+ ExcelX::Previewer.preview(/path/to/excelxfile)
19
+
20
+
data/examples/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gem "excelx_preview",:git=>git://github.com/kp666/excelx_preview.git
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'excelx_preview'
4
+ data = ExcelX::Previewer.preview(ARGV[0])["sheet1"]
5
+
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "excelx_preview/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "excelx_preview"
7
+ s.version = ExcelxPreview::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Krishnaprasad T Nair"]
10
+ s.email = ["kpisgod@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Gem that extracts first 10 rows from an excelx file}
13
+ s.description = %q{Gem that extracts first 10 rows from an excelx file.Supports simple formulas.}
14
+
15
+ s.add_dependency "nokogiri"
16
+ s.add_dependency 'activesupport'
17
+ s.add_dependency 'uuid'
18
+ s.files = `git ls-files`.split("\n")
19
+ # s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ # s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+ end
@@ -0,0 +1,10 @@
1
+ # standard libraries
2
+ require 'pp'
3
+ require 'time'
4
+ require 'date'
5
+ # other gems
6
+ require 'uuid'
7
+ require 'nokogiri'
8
+
9
+
10
+ require_relative "excelx_preview/excelx"
@@ -0,0 +1,316 @@
1
+ class NoRowError < StandardError
2
+ end
3
+
4
+ module ExcelX
5
+ module Previewer
6
+
7
+ @preview ={}
8
+ STANDARD_FORMATS = {
9
+ 0 => 'General',
10
+ 1 => '0',
11
+ 2 => '0.00',
12
+ 3 => '#,##0',
13
+ 4 => '#,##0.00',
14
+ 9 => '0%',
15
+ 10 => '0.00%',
16
+ 11 => '0.00E+00',
17
+ 12 => '# ?/?',
18
+ 13 => '# ??/??',
19
+ 14 => 'mm-dd-yy',
20
+ 15 => 'd-mmm-yy',
21
+ 16 => 'd-mmm',
22
+ 17 => 'mmm-yy',
23
+ 18 => 'h:mm AM/PM',
24
+ 19 => 'h:mm:ss AM/PM',
25
+ 20 => 'h:mm',
26
+ 21 => 'h:mm:ss',
27
+ 22 => 'm/d/yy h:mm',
28
+ 37 => '#,##0 ;(#,##0)',
29
+ 38 => '#,##0 ;[Red](#,##0)',
30
+ 39 => '#,##0.00;(#,##0.00)',
31
+ 40 => '#,##0.00;[Red](#,##0.00)',
32
+ 45 => 'mm:ss',
33
+ 46 => '[h]:mm:ss',
34
+ 47 => 'mmss.0',
35
+ 48 => '##0.0E+0',
36
+ 49 => '@',
37
+ }
38
+ FORMATS = {
39
+ "general"=>:float,
40
+ "0"=>:float,
41
+ "0.00"=>:float,
42
+ "#,##0"=>:float,
43
+ "#,##0.00"=>:float,
44
+ "0%"=>:percentage,
45
+ "0.00%"=>:percentage,
46
+ "0.00E+00"=>:float,
47
+ "# ?/?"=>:float,
48
+ "# ??/??"=>:float,
49
+ "mm-dd-yy"=>:date,
50
+ "d-mmm-yy"=>:date,
51
+ "d-mmm"=>:date,
52
+ "mmm-yy"=>:date,
53
+ "h:mm AM/PM"=>:time,
54
+ "h:mm:ss AM/PM"=>:time,
55
+ "hh:mm:ss AM/PM"=>:time,
56
+ "h:mm"=>:time,
57
+ "h:mm:ss"=>:time,
58
+ "m/d/yy h:mm"=>:datetime,
59
+ "#,##0 ;(#,##0)"=>:float,
60
+ "#,##0 ;[Red](#,##0)"=>:float,
61
+ "#,##0.00;(#,##0.00)"=>:float,
62
+ "#,##0.00;[Red](#,##0.00)"=>:float,
63
+ "mm:ss"=>:time,
64
+ "[h]:mm:ss"=>:time,
65
+ "mmss.0"=>:time,
66
+ "##0.0E+0"=>:float,
67
+ "@"=>:float,
68
+ "yyyy\\-mm\\-dd"=>:date,
69
+ "dd/mm/yy"=>:date,
70
+ "hh:mm:ss"=>:time,
71
+ "dd/mm/yy hh:mm"=>:datetime,
72
+ "dd/mmm/yy"=>:date,
73
+ "yyyy-mm-dd"=>:date,
74
+ "hh:mm:ss am/pm" => :time,
75
+ "mm/dd/yy hh:mm am/pm" =>:datetime,
76
+ "mm/dd/yy" => :date,
77
+ "h:mm am/pm" => :time,
78
+ "m/d/yyyy" => :date,
79
+ "m/d/yyyy h:mm" => :datetime,
80
+ "hh:mm am/pm" => :time,
81
+ "dd/mm/yyyy" => :date,
82
+ }
83
+ DATE_TIME_FORMAT={#add more formats when found
84
+ "mm/dd/yy" => "%y,%m,%d", #coz excelx stores in this format
85
+ "m/d/yyyy" =>"%Y,%m,%d",
86
+ "h:mm am/pm" => "%I:%M:%S %p"
87
+ }
88
+
89
+ def self.styles() #taken from roo gem
90
+ @numFmts =[]
91
+ @cellXfs =[]
92
+ style= Nokogiri::XML(File.open("#{@tmp_folder}/xl/styles.xml"))
93
+ style.xpath("//*[local-name()='numFmt']").each do |numFmt|
94
+ numFmtId = numFmt.attributes['numFmtId']
95
+ formatCode = numFmt.attributes['formatCode']
96
+ @numFmts << [numFmtId, formatCode]
97
+ end
98
+ style.xpath("//*[local-name()='cellXfs']").each do |xfs|
99
+ xfs.children.each do |xf|
100
+ numFmtId = xf['numFmtId']
101
+ @cellXfs << [numFmtId]
102
+ end
103
+ end
104
+ end
105
+
106
+ def self.attribute2format(s) #taken from roo gem
107
+ result = nil
108
+ @numFmts.each { |nf|
109
+
110
+ if nf.first.to_s == @cellXfs[s.to_i].first
111
+ result = nf[1]
112
+ break
113
+ end
114
+ }
115
+ unless result
116
+ id = @cellXfs[s.to_i].first.to_i
117
+ if STANDARD_FORMATS.has_key? id
118
+ result = STANDARD_FORMATS[id]
119
+ end
120
+ end
121
+ result
122
+ end
123
+
124
+ def self.format2type(format) #taken from roo gem
125
+ if FORMATS.has_key? format
126
+ FORMATS[format]
127
+ else
128
+ :float
129
+ end
130
+ end
131
+
132
+ def self.datetimeformat(format, type)
133
+ if DATE_TIME_FORMAT.has_key? format
134
+ DATE_TIME_FORMAT[format]
135
+ else
136
+ if type ==:date
137
+ "%Y,%m,%d"
138
+ elsif type ==:time
139
+ "%I:%M:%S %p"
140
+ else
141
+ ""
142
+ end
143
+ end
144
+ end
145
+
146
+ def self.datetime(value, type)
147
+ seconds = (value.to_f - 25569) * 86400.0
148
+ if type == :time
149
+ (Time.at seconds).utc.strftime("%I:%M:%S %p") rescue value
150
+ elsif type == :datetime
151
+ (Time.at seconds).utc.strftime("%m/%d/%Y %I:%M:%S %p") rescue value
152
+ elsif type == :date
153
+ (Time.at seconds).utc.strftime("%m/%d/%Y") rescue value
154
+ elsif type == :float
155
+ value
156
+ end
157
+ end
158
+
159
+ def self.datetime_whenis(value, type, format)
160
+
161
+ date_or_time_format = datetimeformat(format, type)
162
+ if type == :date
163
+ Date.strptime(value, "date(#{date_or_time_format}").to_s rescue value # TODO do same formatting as in the sheet
164
+ elsif type == :time
165
+ Time.parse("#{value}".delete('time(').chop.split(",").map(&:to_i).join(":")).strftime(date_or_time_format) rescue value
166
+ end
167
+
168
+ end
169
+
170
+ def self.content_from_link(link)
171
+ if link.children.first.name=="f"
172
+ content = link.children.last.children.last.text
173
+ else
174
+ content = link.content
175
+ end
176
+ end
177
+
178
+ def self.is?(c)
179
+ c.children.each do |f_or_is|
180
+ if f_or_is.name=="is"
181
+ return true
182
+ end
183
+ end
184
+ return false
185
+ end
186
+
187
+ def self.get_content(link, is=false)
188
+ content = nil
189
+ s_value = link["s"].to_i
190
+ format = attribute2format(s_value).to_s.downcase.gsub(/\\/, "").gsub("-", "/")
191
+ type = format2type(format)
192
+ return nil if link.children.empty?
193
+ if is?(link)
194
+ value = link.content.downcase
195
+ datetime_whenis(value, type, format)
196
+ return datetime_whenis(value, type, format)
197
+ end
198
+ if s_value == 0
199
+ if link['t']=="s"
200
+ content = @shared_strings[link.content.to_i]
201
+ else
202
+ content = content_from_link(link)
203
+ end
204
+ elsif s_value >0 && s_value <48
205
+ content = datetime(content_from_link(link), type)
206
+ else
207
+ content = content_from_link(link)
208
+ end
209
+ content
210
+ end
211
+
212
+ def self.first_10_from_sheet()
213
+ @preview[@sheet_name]= {}
214
+ return if @row_count < 1
215
+ @doc.xpath("//*[local-name()='row']")[0..10].each do |row|
216
+ sheet = {}
217
+ row.children.each do |c|
218
+ content = get_content(c)
219
+ sheet[c['r']] = content
220
+ end
221
+ @preview[@sheet_name]["Row#{row['r']}"]= sheet
222
+ end
223
+ end
224
+
225
+ def self.preview(filename, sheets=false)
226
+
227
+ @filename = filename.chomp(File.extname(filename)) rescue filename
228
+ @tmp_folder = "/tmp/#{UUID.new.generate}"
229
+ unzip
230
+ @sheet_list = extracted_sheets unless sheets
231
+ @sheet_list = [sheets] if sheets
232
+ shared_strings
233
+ @sheet_list.each do |sheet_name_in_xml, sheet_name_in_excelx|
234
+ parse_xml(sheet_name_in_xml, sheet_name_in_excelx)
235
+ fetch_headers
236
+ styles
237
+ first_10_from_sheet
238
+ end
239
+ cleanup()
240
+ @preview
241
+ end
242
+
243
+ def self.unzip()
244
+ `unzip -o #{@filename}.xlsx -d #{@tmp_folder}`
245
+ end
246
+
247
+ def self.cleanup() #TODO use fileutils or something.
248
+ FileUtils.remove_dir(@tmp_folder,force= true)
249
+ end
250
+
251
+ def self.extracted_sheets()
252
+ sheet_list_xml= Nokogiri::XML(File.open("#{@tmp_folder}/xl/workbook.xml"))
253
+ @sheet_list = {}
254
+ sheet_list_xml.xpath("//*[local-name()='sheet']").each do |sheet|
255
+ sheet_name_in_xml = "sheet#{sheet.attributes["sheetId"].value}"
256
+ sheet_name_in_excelx = sheet.attributes["name"].value.downcase
257
+ @sheet_list[sheet_name_in_xml] = sheet_name_in_excelx
258
+ end
259
+ @sheet_list
260
+ end
261
+
262
+ def self.shared_strings()
263
+ @shared_strings =[]
264
+ begin
265
+ shared = Nokogiri::XML(File.open ("#{@tmp_folder}/xl/sharedStrings.xml"))
266
+ rescue
267
+ "no shared file"
268
+ return @shared_strings
269
+ end
270
+ shared.xpath("//*[local-name()='si']").each do |shared_strings|
271
+ @shared_strings << shared_strings.content
272
+ end
273
+
274
+
275
+ @shared_strings
276
+ end
277
+
278
+ def self.parse_xml(sheet_name_in_xml, sheet_name_in_excelx)
279
+ @sheet_name = sheet_name_in_excelx
280
+ sheet_xml_file = "#{@tmp_folder}/xl/worksheets/#{sheet_name_in_xml}.xml"
281
+ xml_string =""
282
+ sheets =[]
283
+ counter = 0
284
+ File.open(sheet_xml_file, "r") do |f|
285
+ while counter<10 do
286
+ buffer = f.read(1024)
287
+ occurrence = 0
288
+ occurrence = buffer.scan("</row>").size rescue break
289
+ counter += occurrence
290
+ xml_string = "#{xml_string}#{buffer}"
291
+ end
292
+ end
293
+ rows = xml_string.split("</row>")[0..-2].join("</row>")
294
+ @doc = Nokogiri.XML(rows)
295
+
296
+ end
297
+
298
+ def self.fetch_headers()
299
+ @headers =[]
300
+ @row_count = @doc.xpath("//*[local-name()='row']").count
301
+ return if @row_count <1
302
+ @doc.xpath("//*[local-name()='row']")[0].children.each do |first_row|
303
+ if first_row['t']=="s"
304
+ @headers<< @shared_strings[first_row.content.to_i]
305
+ else
306
+ @headers<< first_row.content
307
+ end
308
+ end
309
+ @headers
310
+ end
311
+
312
+ end
313
+ end
314
+
315
+
316
+
@@ -0,0 +1,3 @@
1
+ module ExcelxPreview #:nodoc:
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,25 @@
1
+ test_hash = {"sheet1"=>
2
+ {"Row1"=>
3
+ {"A1"=>"01:25:30 PM",
4
+ "B1"=>"0.154251449887584",
5
+ "C1"=>"150",
6
+ "D1"=>"2012-01-28",
7
+ "E1"=>"12/25/2012 12:12:30 PM",
8
+ "F1"=>"12/03/2012",
9
+ "G1"=>"01:00:45 AM"},
10
+ "Row2"=>{"A2"=>"1", "B2"=>"2", "C2"=>"3", "E2"=>"4", "F2"=>"5", "G2"=>"6"}
11
+ },
12
+ "sheet2"=>{"Row1"=>{"A1"=>nil, "D1"=>nil, "E1"=>nil, "F1"=>nil, "G1"=>nil}},
13
+ "sheet3"=>{}}
14
+
15
+ require "rspec"
16
+
17
+ require_relative 'spec_helper'
18
+ require_relative '../lib/excelx_preview'
19
+ describe ExcelX do
20
+ describe "#preview" do
21
+ it "it should preview first 10 lines of each sheets" do
22
+ ExcelX::Previewer.preview("data/formulas.xlsx").should == test_hash
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,8 @@
1
+ require "simplecov"
2
+ require "simplecov-rcov"
3
+
4
+ SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
5
+ SimpleCov.start do
6
+ add_filter "vendor"
7
+ add_filter "spec"
8
+ end if ENV["COVERAGE"]
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excelx_preview
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Krishnaprasad T Nair
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: activesupport
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: uuid
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Gem that extracts first 10 rows from an excelx file.Supports simple formulas.
63
+ email:
64
+ - kpisgod@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - README.md
72
+ - examples/Gemfile
73
+ - examples/spreadsheet.rb
74
+ - excelx_preview.gemspec
75
+ - lib/excelx_preview.rb
76
+ - lib/excelx_preview/excelx.rb
77
+ - lib/excelx_preview/version.rb
78
+ - spec/excelx_spec.rb
79
+ - spec/spec_helper.rb
80
+ homepage: ''
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ segments:
93
+ - 0
94
+ hash: 674781492701308248
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ segments:
102
+ - 0
103
+ hash: 674781492701308248
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.21
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: Gem that extracts first 10 rows from an excelx file
110
+ test_files: []
111
+ has_rdoc: