excelx_preview 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.xlsx*
2
+ *.xls*
3
+ .bundle/
4
+ .idea/
5
+ *.lock
6
+ vendor/cache/
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source :gemcutter
2
+ source :rubygems
3
+ group :development do
4
+ gem "rspec"
5
+ gem "rake"
6
+ gem "guard"
7
+ gem "guard-rspec"
8
+ gem "simplecov"
9
+ gem "flog"
10
+ gem "yard"
11
+ gem "ci_reporter"
12
+ gem "simplecov-rcov"
13
+ gem "rdiscount"
14
+ end
15
+
16
+ gemspec
data/README.md ADDED
@@ -0,0 +1,20 @@
1
+ ## Description
2
+
3
+ Gem that extracts the first 10 lines of an Excel X file.
4
+
5
+ ## Install
6
+
7
+ Install rvm and ruby-1.9.2
8
+
9
+
10
+ ## Then do:
11
+
12
+ $ gem install bundler
13
+ $ bundle install --path vendor/bundle
14
+
15
+ ## Usage
16
+
17
+ require 'excelx_preview'
18
+ ExcelX::Previewer.preview(/path/to/excelxfile)
19
+
20
+
data/examples/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gem "excelx_preview",:git=>git://github.com/kp666/excelx_preview.git
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'excelx_preview'
4
+ data = ExcelX::Previewer.preview(ARGV[0])["sheet1"]
5
+
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "excelx_preview/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "excelx_preview"
7
+ s.version = ExcelxPreview::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Krishnaprasad T Nair"]
10
+ s.email = ["kpisgod@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Gem that extracts first 10 rows from an excelx file}
13
+ s.description = %q{Gem that extracts first 10 rows from an excelx file.Supports simple formulas.}
14
+
15
+ s.add_dependency "nokogiri"
16
+ s.add_dependency 'activesupport'
17
+ s.add_dependency 'uuid'
18
+ s.files = `git ls-files`.split("\n")
19
+ # s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ # s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+ end
@@ -0,0 +1,10 @@
1
+ # standard libraries
2
+ require 'pp'
3
+ require 'time'
4
+ require 'date'
5
+ # other gems
6
+ require 'uuid'
7
+ require 'nokogiri'
8
+
9
+
10
+ require_relative "excelx_preview/excelx"
@@ -0,0 +1,316 @@
1
+ class NoRowError < StandardError
2
+ end
3
+
4
+ module ExcelX
5
+ module Previewer
6
+
7
+ @preview ={}
8
+ STANDARD_FORMATS = {
9
+ 0 => 'General',
10
+ 1 => '0',
11
+ 2 => '0.00',
12
+ 3 => '#,##0',
13
+ 4 => '#,##0.00',
14
+ 9 => '0%',
15
+ 10 => '0.00%',
16
+ 11 => '0.00E+00',
17
+ 12 => '# ?/?',
18
+ 13 => '# ??/??',
19
+ 14 => 'mm-dd-yy',
20
+ 15 => 'd-mmm-yy',
21
+ 16 => 'd-mmm',
22
+ 17 => 'mmm-yy',
23
+ 18 => 'h:mm AM/PM',
24
+ 19 => 'h:mm:ss AM/PM',
25
+ 20 => 'h:mm',
26
+ 21 => 'h:mm:ss',
27
+ 22 => 'm/d/yy h:mm',
28
+ 37 => '#,##0 ;(#,##0)',
29
+ 38 => '#,##0 ;[Red](#,##0)',
30
+ 39 => '#,##0.00;(#,##0.00)',
31
+ 40 => '#,##0.00;[Red](#,##0.00)',
32
+ 45 => 'mm:ss',
33
+ 46 => '[h]:mm:ss',
34
+ 47 => 'mmss.0',
35
+ 48 => '##0.0E+0',
36
+ 49 => '@',
37
+ }
38
+ FORMATS = {
39
+ "general"=>:float,
40
+ "0"=>:float,
41
+ "0.00"=>:float,
42
+ "#,##0"=>:float,
43
+ "#,##0.00"=>:float,
44
+ "0%"=>:percentage,
45
+ "0.00%"=>:percentage,
46
+ "0.00E+00"=>:float,
47
+ "# ?/?"=>:float,
48
+ "# ??/??"=>:float,
49
+ "mm-dd-yy"=>:date,
50
+ "d-mmm-yy"=>:date,
51
+ "d-mmm"=>:date,
52
+ "mmm-yy"=>:date,
53
+ "h:mm AM/PM"=>:time,
54
+ "h:mm:ss AM/PM"=>:time,
55
+ "hh:mm:ss AM/PM"=>:time,
56
+ "h:mm"=>:time,
57
+ "h:mm:ss"=>:time,
58
+ "m/d/yy h:mm"=>:datetime,
59
+ "#,##0 ;(#,##0)"=>:float,
60
+ "#,##0 ;[Red](#,##0)"=>:float,
61
+ "#,##0.00;(#,##0.00)"=>:float,
62
+ "#,##0.00;[Red](#,##0.00)"=>:float,
63
+ "mm:ss"=>:time,
64
+ "[h]:mm:ss"=>:time,
65
+ "mmss.0"=>:time,
66
+ "##0.0E+0"=>:float,
67
+ "@"=>:float,
68
+ "yyyy\\-mm\\-dd"=>:date,
69
+ "dd/mm/yy"=>:date,
70
+ "hh:mm:ss"=>:time,
71
+ "dd/mm/yy hh:mm"=>:datetime,
72
+ "dd/mmm/yy"=>:date,
73
+ "yyyy-mm-dd"=>:date,
74
+ "hh:mm:ss am/pm" => :time,
75
+ "mm/dd/yy hh:mm am/pm" =>:datetime,
76
+ "mm/dd/yy" => :date,
77
+ "h:mm am/pm" => :time,
78
+ "m/d/yyyy" => :date,
79
+ "m/d/yyyy h:mm" => :datetime,
80
+ "hh:mm am/pm" => :time,
81
+ "dd/mm/yyyy" => :date,
82
+ }
83
+ DATE_TIME_FORMAT={#add more formats when found
84
+ "mm/dd/yy" => "%y,%m,%d", #coz excelx stores in this format
85
+ "m/d/yyyy" =>"%Y,%m,%d",
86
+ "h:mm am/pm" => "%I:%M:%S %p"
87
+ }
88
+
89
+ def self.styles() #taken from roo gem
90
+ @numFmts =[]
91
+ @cellXfs =[]
92
+ style= Nokogiri::XML(File.open("#{@tmp_folder}/xl/styles.xml"))
93
+ style.xpath("//*[local-name()='numFmt']").each do |numFmt|
94
+ numFmtId = numFmt.attributes['numFmtId']
95
+ formatCode = numFmt.attributes['formatCode']
96
+ @numFmts << [numFmtId, formatCode]
97
+ end
98
+ style.xpath("//*[local-name()='cellXfs']").each do |xfs|
99
+ xfs.children.each do |xf|
100
+ numFmtId = xf['numFmtId']
101
+ @cellXfs << [numFmtId]
102
+ end
103
+ end
104
+ end
105
+
106
+ def self.attribute2format(s) #taken from roo gem
107
+ result = nil
108
+ @numFmts.each { |nf|
109
+
110
+ if nf.first.to_s == @cellXfs[s.to_i].first
111
+ result = nf[1]
112
+ break
113
+ end
114
+ }
115
+ unless result
116
+ id = @cellXfs[s.to_i].first.to_i
117
+ if STANDARD_FORMATS.has_key? id
118
+ result = STANDARD_FORMATS[id]
119
+ end
120
+ end
121
+ result
122
+ end
123
+
124
+ def self.format2type(format) #taken from roo gem
125
+ if FORMATS.has_key? format
126
+ FORMATS[format]
127
+ else
128
+ :float
129
+ end
130
+ end
131
+
132
+ def self.datetimeformat(format, type)
133
+ if DATE_TIME_FORMAT.has_key? format
134
+ DATE_TIME_FORMAT[format]
135
+ else
136
+ if type ==:date
137
+ "%Y,%m,%d"
138
+ elsif type ==:time
139
+ "%I:%M:%S %p"
140
+ else
141
+ ""
142
+ end
143
+ end
144
+ end
145
+
146
+ def self.datetime(value, type)
147
+ seconds = (value.to_f - 25569) * 86400.0
148
+ if type == :time
149
+ (Time.at seconds).utc.strftime("%I:%M:%S %p") rescue value
150
+ elsif type == :datetime
151
+ (Time.at seconds).utc.strftime("%m/%d/%Y %I:%M:%S %p") rescue value
152
+ elsif type == :date
153
+ (Time.at seconds).utc.strftime("%m/%d/%Y") rescue value
154
+ elsif type == :float
155
+ value
156
+ end
157
+ end
158
+
159
+ def self.datetime_whenis(value, type, format)
160
+
161
+ date_or_time_format = datetimeformat(format, type)
162
+ if type == :date
163
+ Date.strptime(value, "date(#{date_or_time_format}").to_s rescue value # TODO do same formatting as in the sheet
164
+ elsif type == :time
165
+ Time.parse("#{value}".delete('time(').chop.split(",").map(&:to_i).join(":")).strftime(date_or_time_format) rescue value
166
+ end
167
+
168
+ end
169
+
170
+ def self.content_from_link(link)
171
+ if link.children.first.name=="f"
172
+ content = link.children.last.children.last.text
173
+ else
174
+ content = link.content
175
+ end
176
+ end
177
+
178
+ def self.is?(c)
179
+ c.children.each do |f_or_is|
180
+ if f_or_is.name=="is"
181
+ return true
182
+ end
183
+ end
184
+ return false
185
+ end
186
+
187
+ def self.get_content(link, is=false)
188
+ content = nil
189
+ s_value = link["s"].to_i
190
+ format = attribute2format(s_value).to_s.downcase.gsub(/\\/, "").gsub("-", "/")
191
+ type = format2type(format)
192
+ return nil if link.children.empty?
193
+ if is?(link)
194
+ value = link.content.downcase
195
+ datetime_whenis(value, type, format)
196
+ return datetime_whenis(value, type, format)
197
+ end
198
+ if s_value == 0
199
+ if link['t']=="s"
200
+ content = @shared_strings[link.content.to_i]
201
+ else
202
+ content = content_from_link(link)
203
+ end
204
+ elsif s_value >0 && s_value <48
205
+ content = datetime(content_from_link(link), type)
206
+ else
207
+ content = content_from_link(link)
208
+ end
209
+ content
210
+ end
211
+
212
+ def self.first_10_from_sheet()
213
+ @preview[@sheet_name]= {}
214
+ return if @row_count < 1
215
+ @doc.xpath("//*[local-name()='row']")[0..10].each do |row|
216
+ sheet = {}
217
+ row.children.each do |c|
218
+ content = get_content(c)
219
+ sheet[c['r']] = content
220
+ end
221
+ @preview[@sheet_name]["Row#{row['r']}"]= sheet
222
+ end
223
+ end
224
+
225
+ def self.preview(filename, sheets=false)
226
+
227
+ @filename = filename.chomp(File.extname(filename)) rescue filename
228
+ @tmp_folder = "/tmp/#{UUID.new.generate}"
229
+ unzip
230
+ @sheet_list = extracted_sheets unless sheets
231
+ @sheet_list = [sheets] if sheets
232
+ shared_strings
233
+ @sheet_list.each do |sheet_name_in_xml, sheet_name_in_excelx|
234
+ parse_xml(sheet_name_in_xml, sheet_name_in_excelx)
235
+ fetch_headers
236
+ styles
237
+ first_10_from_sheet
238
+ end
239
+ cleanup()
240
+ @preview
241
+ end
242
+
243
+ def self.unzip()
244
+ `unzip -o #{@filename}.xlsx -d #{@tmp_folder}`
245
+ end
246
+
247
+ def self.cleanup() #TODO use fileutils or something.
248
+ FileUtils.remove_dir(@tmp_folder,force= true)
249
+ end
250
+
251
+ def self.extracted_sheets()
252
+ sheet_list_xml= Nokogiri::XML(File.open("#{@tmp_folder}/xl/workbook.xml"))
253
+ @sheet_list = {}
254
+ sheet_list_xml.xpath("//*[local-name()='sheet']").each do |sheet|
255
+ sheet_name_in_xml = "sheet#{sheet.attributes["sheetId"].value}"
256
+ sheet_name_in_excelx = sheet.attributes["name"].value.downcase
257
+ @sheet_list[sheet_name_in_xml] = sheet_name_in_excelx
258
+ end
259
+ @sheet_list
260
+ end
261
+
262
+ def self.shared_strings()
263
+ @shared_strings =[]
264
+ begin
265
+ shared = Nokogiri::XML(File.open ("#{@tmp_folder}/xl/sharedStrings.xml"))
266
+ rescue
267
+ "no shared file"
268
+ return @shared_strings
269
+ end
270
+ shared.xpath("//*[local-name()='si']").each do |shared_strings|
271
+ @shared_strings << shared_strings.content
272
+ end
273
+
274
+
275
+ @shared_strings
276
+ end
277
+
278
+ def self.parse_xml(sheet_name_in_xml, sheet_name_in_excelx)
279
+ @sheet_name = sheet_name_in_excelx
280
+ sheet_xml_file = "#{@tmp_folder}/xl/worksheets/#{sheet_name_in_xml}.xml"
281
+ xml_string =""
282
+ sheets =[]
283
+ counter = 0
284
+ File.open(sheet_xml_file, "r") do |f|
285
+ while counter<10 do
286
+ buffer = f.read(1024)
287
+ occurrence = 0
288
+ occurrence = buffer.scan("</row>").size rescue break
289
+ counter += occurrence
290
+ xml_string = "#{xml_string}#{buffer}"
291
+ end
292
+ end
293
+ rows = xml_string.split("</row>")[0..-2].join("</row>")
294
+ @doc = Nokogiri.XML(rows)
295
+
296
+ end
297
+
298
+ def self.fetch_headers()
299
+ @headers =[]
300
+ @row_count = @doc.xpath("//*[local-name()='row']").count
301
+ return if @row_count <1
302
+ @doc.xpath("//*[local-name()='row']")[0].children.each do |first_row|
303
+ if first_row['t']=="s"
304
+ @headers<< @shared_strings[first_row.content.to_i]
305
+ else
306
+ @headers<< first_row.content
307
+ end
308
+ end
309
+ @headers
310
+ end
311
+
312
+ end
313
+ end
314
+
315
+
316
+
@@ -0,0 +1,3 @@
1
+ module ExcelxPreview #:nodoc:
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,25 @@
1
+ test_hash = {"sheet1"=>
2
+ {"Row1"=>
3
+ {"A1"=>"01:25:30 PM",
4
+ "B1"=>"0.154251449887584",
5
+ "C1"=>"150",
6
+ "D1"=>"2012-01-28",
7
+ "E1"=>"12/25/2012 12:12:30 PM",
8
+ "F1"=>"12/03/2012",
9
+ "G1"=>"01:00:45 AM"},
10
+ "Row2"=>{"A2"=>"1", "B2"=>"2", "C2"=>"3", "E2"=>"4", "F2"=>"5", "G2"=>"6"}
11
+ },
12
+ "sheet2"=>{"Row1"=>{"A1"=>nil, "D1"=>nil, "E1"=>nil, "F1"=>nil, "G1"=>nil}},
13
+ "sheet3"=>{}}
14
+
15
+ require "rspec"
16
+
17
+ require_relative 'spec_helper'
18
+ require_relative '../lib/excelx_preview'
19
+ describe ExcelX do
20
+ describe "#preview" do
21
+ it "it should preview first 10 lines of each sheets" do
22
+ ExcelX::Previewer.preview("data/formulas.xlsx").should == test_hash
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,8 @@
1
+ require "simplecov"
2
+ require "simplecov-rcov"
3
+
4
+ SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
5
+ SimpleCov.start do
6
+ add_filter "vendor"
7
+ add_filter "spec"
8
+ end if ENV["COVERAGE"]
metadata ADDED
@@ -0,0 +1,111 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: excelx_preview
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Krishnaprasad T Nair
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: activesupport
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: uuid
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Gem that extracts first 10 rows from an excelx file.Supports simple formulas.
63
+ email:
64
+ - kpisgod@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - README.md
72
+ - examples/Gemfile
73
+ - examples/spreadsheet.rb
74
+ - excelx_preview.gemspec
75
+ - lib/excelx_preview.rb
76
+ - lib/excelx_preview/excelx.rb
77
+ - lib/excelx_preview/version.rb
78
+ - spec/excelx_spec.rb
79
+ - spec/spec_helper.rb
80
+ homepage: ''
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ segments:
93
+ - 0
94
+ hash: 674781492701308248
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ segments:
102
+ - 0
103
+ hash: 674781492701308248
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.21
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: Gem that extracts first 10 rows from an excelx file
110
+ test_files: []
111
+ has_rdoc: