extract 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.document +5 -0
  2. data/.lre +1 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +27 -0
  5. data/Gemfile.lock +108 -0
  6. data/Guardfile +27 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.rdoc +19 -0
  9. data/Rakefile +49 -0
  10. data/VERSION +1 -0
  11. data/extract.gemspec +128 -0
  12. data/lib/extract.rb +39 -0
  13. data/lib/extract/excel_formulas.rb +44 -0
  14. data/lib/extract/formula.treetop +66 -0
  15. data/lib/extract/math.treetop +33 -0
  16. data/lib/extract/math_calc.rb +111 -0
  17. data/lib/extract/parser.rb +90 -0
  18. data/lib/extract/persist/sheet.rb +26 -0
  19. data/lib/extract/sheet.rb +85 -0
  20. data/lib/extract/sheet_comp.rb +7 -0
  21. data/lib/extract/sheet_definition.rb +127 -0
  22. data/lib/extract/tree/base.rb +7 -0
  23. data/lib/extract/tree/cell.rb +33 -0
  24. data/lib/extract/tree/cond_exp.rb +25 -0
  25. data/lib/extract/tree/formula.rb +24 -0
  26. data/lib/extract/tree/formula_args.rb +30 -0
  27. data/lib/extract/tree/math.rb +106 -0
  28. data/lib/extract/tree/num.rb +18 -0
  29. data/lib/extract/tree/operator.rb +9 -0
  30. data/lib/extract/tree/range.rb +58 -0
  31. data/lib/extract/tree/string.rb +12 -0
  32. data/samples/baseball.xlsx +0 -0
  33. data/samples/div.xlsx +0 -0
  34. data/spec/config/mongoid.yml +6 -0
  35. data/spec/deps_spec.rb +48 -0
  36. data/spec/extract_spec.rb +44 -0
  37. data/spec/math_spec.rb +52 -0
  38. data/spec/parser_spec.rb +145 -0
  39. data/spec/persist_spec.rb +34 -0
  40. data/spec/sheet_definition_spec.rb +46 -0
  41. data/spec/sheet_spec.rb +51 -0
  42. data/spec/spec_helper.rb +68 -0
  43. data/vol/excel_test.rb +55 -0
  44. data/vol/parse_test.rb +8 -0
  45. data/vol/scratch.rb +61 -0
  46. data/vol/web.rb +0 -0
  47. data/vol/yaml_test.rb +4 -0
  48. data/web/file.tmp +0 -0
  49. data/web/file.xlsx +0 -0
  50. data/web/main.rb +59 -0
  51. data/web/mongoid.yml +6 -0
  52. data/web/views/index.haml +39 -0
  53. data/web/views/upload.haml +13 -0
  54. metadata +311 -0
@@ -0,0 +1,66 @@
1
+ grammar Formula
2
+ include MathMy
3
+
4
+ rule top
5
+ "=" meat:(cond / range / math_exp_full / formula / cell / num)
6
+ end
7
+
8
+ rule primary
9
+ num / cell / formula
10
+ end
11
+
12
+
13
+
14
+ rule cell
15
+ "-"? "$"? c:[A-Z] "$"? r:[0-9]+ <Extract::Tree::Cell>
16
+ end
17
+
18
+ rule range
19
+ a:cell ":" b:cell <Extract::Tree::Range>
20
+ end
21
+
22
+ rule cell_or_range
23
+ range / cell
24
+ end
25
+
26
+ rule num
27
+ "-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
28
+ end
29
+
30
+
31
+ rule formula
32
+ formula_name "(" formula_args ")" <Extract::Tree::Formula>
33
+ end
34
+
35
+ rule formula_name
36
+ "THING" / "DOUBLE" / "SUM" / "IF" / "MAX" / "VLOOKUP" / "SQRT" / "COMBIN"
37
+ end
38
+
39
+ rule formula_arg
40
+ cond / math_exp_full / formula / range / cell / num / string
41
+ end
42
+
43
+ rule formula_args
44
+ formula_arg rest:("," fa:formula_arg)* <Extract::Tree::FormulaArgs>
45
+ end
46
+
47
+ rule cond_exp
48
+ a:(num / cell / string / formula / math_exp_full) op:(">=" / "<=" / "=" / ">" / "<") b:(num / cell / string / formula / math_exp_full) <Extract::Tree::CondExp>
49
+ end
50
+
51
+ rule string
52
+ "\"" [^"]+ "\"" <Extract::Tree::String>
53
+ end
54
+
55
+ rule cond_const
56
+ ("TRUE" / "FALSE") {
57
+ def excel_value
58
+ (text_value == "TRUE") ? true : false
59
+ end
60
+ }
61
+ end
62
+
63
+ rule cond
64
+ cond_exp / cond_const
65
+ end
66
+ end
@@ -0,0 +1,33 @@
1
+ grammar MathMy
2
+ rule math_exp_full
3
+ (paren_exp / primary) rest:(op math_exp)+ <Extract::Tree::Math>
4
+ end
5
+
6
+ rule naked_exp
7
+ primary rest:(op math_exp)* <Extract::Tree::Math>
8
+ end
9
+
10
+ rule paren_exp
11
+ "(" math_exp ")" <Extract::Tree::ParenMath>
12
+ end
13
+
14
+ rule math_exp
15
+ (paren_exp / naked_exp) rest:(op math_exp)* <Extract::Tree::Math>
16
+ end
17
+
18
+
19
+
20
+
21
+ rule num
22
+ "-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
23
+ end
24
+
25
+ rule op
26
+ ("+" / "-" / "*" / "/" / "^") <Extract::Tree::Operator>
27
+ end
28
+
29
+ rule primary
30
+ num
31
+ end
32
+ end
33
+
@@ -0,0 +1,111 @@
1
+
2
+
3
+ module Extract
4
+ class MathWrapper
5
+ include FromHash
6
+ attr_accessor :str
7
+
8
+ def left_associative?
9
+ true
10
+ end
11
+ def operator?
12
+ %w(+ - / * ^).include?(str)
13
+ end
14
+ def precedence
15
+ h = {"*" => 10, "/" => 10, "+" => 5, "-" => 5, "^" => 15}
16
+ h[str]
17
+ end
18
+ def apply(l,r)
19
+ #puts "apply call #{l} #{str} #{r}"
20
+ raise "bad apply, not an operator" unless operator?
21
+
22
+ #raise "bad apply, L #{l} R #{r}" unless l.to_s.present? && r.to_s.present?
23
+
24
+ l.str = "0" if l.respond_to?(:str) && l.str.blank?
25
+ r.str = "0" if r.respond_to?(:str) && r.str.blank?
26
+
27
+ op = str
28
+ op = "**" if op == "^"
29
+
30
+ exp = "#{l.to_s} #{op} #{r.to_s}"
31
+ return 0 if exp =~ /infinity/i || exp =~ /[a-z]/i
32
+ #puts "evaling #{exp}"
33
+ #puts "eval, L #{l.class} #{l.inspect} #{str} R #{r.inspect}"
34
+ raise exp if exp =~ /[a-z]/i
35
+ res = eval(exp)
36
+ #puts "evaled #{exp} to #{res}"
37
+ res
38
+ end
39
+ def to_s
40
+ str
41
+ end
42
+ end
43
+
44
+ class MathCalc
45
+ def shunting_yard(input)
46
+ [].tap do |rpn|
47
+
48
+ # where I store operators before putting them onto final rpn list
49
+ operator_stack = []
50
+
51
+ input.each do |object|
52
+
53
+ if object.operator?
54
+ op1 = object
55
+
56
+ # while we have an operator on the temp stack
57
+ # and that op on the temp stack has a higher precedence than the current op
58
+ while (op2 = operator_stack.last) && (op1.left_associative? ? op1.precedence <= op2.precedence : op1.precedence < op2.precedence)
59
+ rpn << operator_stack.pop
60
+ end
61
+ operator_stack << op1
62
+ else
63
+ rpn << object
64
+ end
65
+ end
66
+ rpn << operator_stack.pop until operator_stack.empty?
67
+ end
68
+ end
69
+
70
+ def shunting_yard_old(input)
71
+ input = input.map { |x| MathWrapper.new(:str => x) }
72
+ res = shunting_yard_inner(input)
73
+ res.map { |x| x.str }
74
+ end
75
+
76
+ def rpn(input)
77
+ results = []
78
+ input.each do |object|
79
+ if object.operator?
80
+ r, l = results.pop, results.pop
81
+ results << object.apply(l, r)
82
+ else
83
+ results << object
84
+ end
85
+ end
86
+ results.first
87
+ end
88
+
89
+ def parse_eval(input)
90
+ raw_input = input
91
+ #raise input.map { |x| x.text_value }.inspect
92
+ input = input.map { |x| MathWrapper.new(:str => (x.respond_to?(:excel_value) ? x.excel_value : x.text_value)) }
93
+ #input = input.split(" ") if input.kind_of?(String)
94
+ res = shunting_yard(input)
95
+ #puts "before rpn #{res.inspect}"
96
+ begin
97
+ res = rpn(res)
98
+ rescue => exp
99
+ puts raw_input.map { |x| x.text_value }.inspect
100
+ puts res.inspect
101
+ raise exp
102
+ end
103
+ end
104
+
105
+ class << self
106
+ def method_missing(sym,*args,&b)
107
+ new.send(sym,*args,&b)
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,90 @@
1
+
2
+
3
+ #Treetop.load "lib/extract/math"
4
+ #Treetop.load "lib/extract/formula"
5
+
6
+ {:math => "MathMy", :formula => "Formula"}.each do |f,c|
7
+ f = File.expand_path(File.dirname(__FILE__)) + "/#{f}"
8
+ Treetop.load f
9
+ #Object.send(:remove_const,"#{c}Parser")
10
+ end
11
+
12
+ {:math => "MathMy", :formula => "Formula"}.each do |f,c|
13
+ #Treetop.load "lib/extract/#{f}"
14
+ end
15
+
16
+
17
+
18
+
19
+ class Object
20
+ attr_accessor :root_sheet
21
+
22
+ def find_sheet
23
+ if root_sheet
24
+ root_sheet
25
+ elsif parent
26
+ parent.find_sheet
27
+ else
28
+ raise "can't find sheet"
29
+ end
30
+ end
31
+ end
32
+
33
+ module Extract
34
+ class Parser
35
+ include FromHash
36
+ attr_accessor :str, :sheet
37
+
38
+ def result
39
+ p = FormulaParser.new
40
+ res = p.parse(str.gsub(" ",""))
41
+ if !res
42
+ strs = []
43
+ strs << p.failure_reason
44
+ strs << p.failure_line
45
+ strs << p.failure_column
46
+ strs << "no result for #{str}"
47
+ raise strs.join("\n")
48
+ end
49
+ res
50
+ end
51
+
52
+ def excel_value
53
+ res = result
54
+ raise "can't parse #{str}" unless res
55
+ res.root_sheet = sheet
56
+ res.meat.excel_value
57
+ end
58
+
59
+ def deps
60
+ res = result
61
+ res.root_sheet = sheet
62
+ #raise res.meat.inspect unless res.meat.respond_to?(:deps)
63
+ raise "can't parse #{str}" unless res
64
+ res.meat.deps.flatten.uniq.map do |c|
65
+ if c =~ /"/
66
+ nil
67
+ else
68
+ c.gsub("$","")
69
+ end
70
+ end.select { |x| x }.sort.uniq
71
+ end
72
+ end
73
+ end
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
@@ -0,0 +1,26 @@
1
+ module Extract
2
+ module Persist
3
+ class Sheet
4
+ include Mongoid::Document
5
+
6
+ field :cells, :type => Hash
7
+
8
+ field :input_cells, :type => Array
9
+ field :output_cells, :type => Array
10
+
11
+
12
+
13
+ def sheet_def
14
+ sheet = Extract::Sheet.new
15
+ cells.each do |k,v|
16
+ sheet.cells[k] = v
17
+ end
18
+
19
+ res = Extract::SheetDefinition.new(:sheet => sheet, :output_cells => output_cells)
20
+ res
21
+ end
22
+
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,85 @@
1
+ module Extract
2
+ class Sheet
3
+ include FromHash
4
+ fattr(:cells) { {} }
5
+ fattr(:cache) { {} }
6
+ fattr(:loaded_values) { {} }
7
+
8
+ def []=(c,val)
9
+ self.cells[c] = val
10
+ end
11
+ def [](c)
12
+ res = cells[c]
13
+ #puts "doing #{c} #{res}"
14
+ if res.to_s =~ /^=/
15
+ self.cache[c] ||= Extract::Parser.new(:str => res, :sheet => self).excel_value
16
+ else
17
+ res
18
+ end
19
+ end
20
+
21
+ def clear_cache!
22
+ self.cache = {}
23
+ end
24
+
25
+ def eval(str)
26
+ Extract::Parser.new(:str => str, :sheet => self).excel_value
27
+ end
28
+
29
+ def deps(c)
30
+ res = cells[c]
31
+ res = if res.to_s =~ /^=/
32
+ d = Extract::Parser.new(:str => res, :sheet => self).deps
33
+ d.map do |dep|
34
+ d2 = deps(dep)
35
+ if d2.empty?
36
+ dep
37
+ else
38
+ d2
39
+ end
40
+ end.flatten
41
+ else
42
+ []
43
+ end
44
+ res.flatten.uniq.map do |c|
45
+ if c =~ /"/
46
+ nil
47
+ else
48
+ c.gsub("$","")
49
+ end
50
+ end.select { |x| x }.sort.uniq
51
+ end
52
+
53
+ def each_value_comp
54
+ loaded_values.each do |k,v|
55
+ yield k,cells[k],self[k],v
56
+ end
57
+ end
58
+
59
+
60
+ class << self
61
+ def load(file)
62
+ w = Roo::Excelx.new(file)
63
+ w.default_sheet = w.sheets.first
64
+
65
+ sheet = Extract::Sheet.new
66
+
67
+ ("A".."Z").each do |col|
68
+ (1..100).each do |row|
69
+ val = if w.formula?(row,col)
70
+ "=" + w.formula(row,col).gsub(" ","")
71
+ else
72
+ w.cell(row,col)
73
+ end
74
+ loaded = w.cell(row,col)
75
+ sheet["#{col}#{row}"] = val if val.present?
76
+ sheet.loaded_values["#{col}#{row}"] = loaded if loaded.present?
77
+ end
78
+ end
79
+
80
+ sheet
81
+ end
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,7 @@
1
+ module Extract
2
+ class SheetComp
3
+ include FromHash
4
+ attr_accessor :filename
5
+
6
+ end
7
+ end
@@ -0,0 +1,127 @@
1
+ module Extract
2
+ class SheetDefinition
3
+ include FromHash
4
+ attr_accessor :sheet
5
+
6
+ def prev_letter(letter)
7
+ r = ("A".."Z").to_a
8
+ raise "bad letter #{letter}" unless r.index(letter)
9
+ i = r.index(letter) - 1
10
+ r[i]
11
+ end
12
+ def left(c)
13
+ col = c[0..0]
14
+ row = c[1..-1]
15
+ col = prev_letter(col)
16
+ "#{col}#{row}"
17
+ end
18
+ fattr(:cell_names) do
19
+ res = {}
20
+ (input_cells + output_cells).each do |c|
21
+ n = left(c)
22
+ res[c] = sheet[n]
23
+ end
24
+ res
25
+ end
26
+ fattr(:output_cells) { [] }
27
+ def output_cells=(arr)
28
+ @output_cells = arr.map do |c|
29
+ if c =~ /:/
30
+ Extract::Tree::Range.cells_in_range(c)
31
+ else
32
+ c
33
+ end
34
+ end.flatten
35
+ end
36
+
37
+ fattr(:dep_map) do
38
+ res = {}
39
+ output_cells.each do |output_cell|
40
+ res[output_cell] = sheet.deps(output_cell).flatten.uniq.map do |c|
41
+ if c =~ /"/
42
+ nil
43
+ else
44
+ c.gsub("$","")
45
+ end
46
+ end.select { |x| x }.sort.uniq
47
+ end
48
+ res
49
+ end
50
+
51
+ fattr(:input_cells) do
52
+ output_cells.map do |c|
53
+ a = dep_map[c] || []
54
+ a = [c] if a.empty?
55
+ a
56
+ end.flatten.uniq.sort
57
+ end
58
+
59
+ def setup_persisted_sheet!(res=nil)
60
+ res.cells = {}
61
+ res.input_cells = []
62
+ res.output_cells = []
63
+
64
+ sheet.cells.each do |k,v|
65
+ res.cells[k] = v
66
+ end
67
+
68
+ input_cells.each do |c|
69
+ res.input_cells << c
70
+ end
71
+
72
+ output_cells.each do |c|
73
+ res.output_cells << c
74
+ end
75
+
76
+ res
77
+ end
78
+
79
+ def save!(res=nil)
80
+ res ||= Persist::Sheet.new
81
+ setup_persisted_sheet! res
82
+ res.save!
83
+ res
84
+ end
85
+
86
+ def [](c)
87
+ sheet[c]
88
+ end
89
+
90
+ def each_input
91
+ input_cells.each do |cell|
92
+ yield cell, cell_names[cell],sheet[cell]
93
+ end
94
+ end
95
+
96
+ def each_output
97
+ output_cells.sort.each do |cell|
98
+ yield cell, cell_names[cell],sheet[cell],dep_map[cell],sheet.cells[cell]
99
+ end
100
+ end
101
+
102
+ def each_other
103
+ res = []
104
+ bad = input_cells + output_cells
105
+ sheet.cells.each do |k,v|
106
+ if !bad.include?(k)
107
+ res << k
108
+ end
109
+ end
110
+
111
+ res.each do |c|
112
+ d = sheet.deps(c)
113
+ yield c,sheet.cells[c],d if sheet.cells[c].present? && d.size > 0
114
+ end
115
+
116
+ end
117
+
118
+ class << self
119
+ def load(file,output)
120
+ res = new
121
+ res.sheet = Sheet.load(file)
122
+ res.output_cells = output
123
+ res
124
+ end
125
+ end
126
+ end
127
+ end