extract 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.document +5 -0
  2. data/.lre +1 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +27 -0
  5. data/Gemfile.lock +108 -0
  6. data/Guardfile +27 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.rdoc +19 -0
  9. data/Rakefile +49 -0
  10. data/VERSION +1 -0
  11. data/extract.gemspec +128 -0
  12. data/lib/extract.rb +39 -0
  13. data/lib/extract/excel_formulas.rb +44 -0
  14. data/lib/extract/formula.treetop +66 -0
  15. data/lib/extract/math.treetop +33 -0
  16. data/lib/extract/math_calc.rb +111 -0
  17. data/lib/extract/parser.rb +90 -0
  18. data/lib/extract/persist/sheet.rb +26 -0
  19. data/lib/extract/sheet.rb +85 -0
  20. data/lib/extract/sheet_comp.rb +7 -0
  21. data/lib/extract/sheet_definition.rb +127 -0
  22. data/lib/extract/tree/base.rb +7 -0
  23. data/lib/extract/tree/cell.rb +33 -0
  24. data/lib/extract/tree/cond_exp.rb +25 -0
  25. data/lib/extract/tree/formula.rb +24 -0
  26. data/lib/extract/tree/formula_args.rb +30 -0
  27. data/lib/extract/tree/math.rb +106 -0
  28. data/lib/extract/tree/num.rb +18 -0
  29. data/lib/extract/tree/operator.rb +9 -0
  30. data/lib/extract/tree/range.rb +58 -0
  31. data/lib/extract/tree/string.rb +12 -0
  32. data/samples/baseball.xlsx +0 -0
  33. data/samples/div.xlsx +0 -0
  34. data/spec/config/mongoid.yml +6 -0
  35. data/spec/deps_spec.rb +48 -0
  36. data/spec/extract_spec.rb +44 -0
  37. data/spec/math_spec.rb +52 -0
  38. data/spec/parser_spec.rb +145 -0
  39. data/spec/persist_spec.rb +34 -0
  40. data/spec/sheet_definition_spec.rb +46 -0
  41. data/spec/sheet_spec.rb +51 -0
  42. data/spec/spec_helper.rb +68 -0
  43. data/vol/excel_test.rb +55 -0
  44. data/vol/parse_test.rb +8 -0
  45. data/vol/scratch.rb +61 -0
  46. data/vol/web.rb +0 -0
  47. data/vol/yaml_test.rb +4 -0
  48. data/web/file.tmp +0 -0
  49. data/web/file.xlsx +0 -0
  50. data/web/main.rb +59 -0
  51. data/web/mongoid.yml +6 -0
  52. data/web/views/index.haml +39 -0
  53. data/web/views/upload.haml +13 -0
  54. metadata +311 -0
@@ -0,0 +1,66 @@
1
+ grammar Formula
2
+ include MathMy
3
+
4
+ rule top
5
+ "=" meat:(cond / range / math_exp_full / formula / cell / num)
6
+ end
7
+
8
+ rule primary
9
+ num / cell / formula
10
+ end
11
+
12
+
13
+
14
+ rule cell
15
+ "-"? "$"? c:[A-Z] "$"? r:[0-9]+ <Extract::Tree::Cell>
16
+ end
17
+
18
+ rule range
19
+ a:cell ":" b:cell <Extract::Tree::Range>
20
+ end
21
+
22
+ rule cell_or_range
23
+ range / cell
24
+ end
25
+
26
+ rule num
27
+ "-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
28
+ end
29
+
30
+
31
+ rule formula
32
+ formula_name "(" formula_args ")" <Extract::Tree::Formula>
33
+ end
34
+
35
+ rule formula_name
36
+ "THING" / "DOUBLE" / "SUM" / "IF" / "MAX" / "VLOOKUP" / "SQRT" / "COMBIN"
37
+ end
38
+
39
+ rule formula_arg
40
+ cond / math_exp_full / formula / range / cell / num / string
41
+ end
42
+
43
+ rule formula_args
44
+ formula_arg rest:("," fa:formula_arg)* <Extract::Tree::FormulaArgs>
45
+ end
46
+
47
+ rule cond_exp
48
+ a:(num / cell / string / formula / math_exp_full) op:(">=" / "<=" / "=" / ">" / "<") b:(num / cell / string / formula / math_exp_full) <Extract::Tree::CondExp>
49
+ end
50
+
51
+ rule string
52
+ "\"" [^"]+ "\"" <Extract::Tree::String>
53
+ end
54
+
55
+ rule cond_const
56
+ ("TRUE" / "FALSE") {
57
+ def excel_value
58
+ (text_value == "TRUE") ? true : false
59
+ end
60
+ }
61
+ end
62
+
63
+ rule cond
64
+ cond_exp / cond_const
65
+ end
66
+ end
@@ -0,0 +1,33 @@
1
+ grammar MathMy
2
+ rule math_exp_full
3
+ (paren_exp / primary) rest:(op math_exp)+ <Extract::Tree::Math>
4
+ end
5
+
6
+ rule naked_exp
7
+ primary rest:(op math_exp)* <Extract::Tree::Math>
8
+ end
9
+
10
+ rule paren_exp
11
+ "(" math_exp ")" <Extract::Tree::ParenMath>
12
+ end
13
+
14
+ rule math_exp
15
+ (paren_exp / naked_exp) rest:(op math_exp)* <Extract::Tree::Math>
16
+ end
17
+
18
+
19
+
20
+
21
+ rule num
22
+ "-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
23
+ end
24
+
25
+ rule op
26
+ ("+" / "-" / "*" / "/" / "^") <Extract::Tree::Operator>
27
+ end
28
+
29
+ rule primary
30
+ num
31
+ end
32
+ end
33
+
@@ -0,0 +1,111 @@
1
+
2
+
3
+ module Extract
4
+ class MathWrapper
5
+ include FromHash
6
+ attr_accessor :str
7
+
8
+ def left_associative?
9
+ true
10
+ end
11
+ def operator?
12
+ %w(+ - / * ^).include?(str)
13
+ end
14
+ def precedence
15
+ h = {"*" => 10, "/" => 10, "+" => 5, "-" => 5, "^" => 15}
16
+ h[str]
17
+ end
18
+ def apply(l,r)
19
+ #puts "apply call #{l} #{str} #{r}"
20
+ raise "bad apply, not an operator" unless operator?
21
+
22
+ #raise "bad apply, L #{l} R #{r}" unless l.to_s.present? && r.to_s.present?
23
+
24
+ l.str = "0" if l.respond_to?(:str) && l.str.blank?
25
+ r.str = "0" if r.respond_to?(:str) && r.str.blank?
26
+
27
+ op = str
28
+ op = "**" if op == "^"
29
+
30
+ exp = "#{l.to_s} #{op} #{r.to_s}"
31
+ return 0 if exp =~ /infinity/i || exp =~ /[a-z]/i
32
+ #puts "evaling #{exp}"
33
+ #puts "eval, L #{l.class} #{l.inspect} #{str} R #{r.inspect}"
34
+ raise exp if exp =~ /[a-z]/i
35
+ res = eval(exp)
36
+ #puts "evaled #{exp} to #{res}"
37
+ res
38
+ end
39
+ def to_s
40
+ str
41
+ end
42
+ end
43
+
44
+ class MathCalc
45
+ def shunting_yard(input)
46
+ [].tap do |rpn|
47
+
48
+ # where I store operators before putting them onto final rpn list
49
+ operator_stack = []
50
+
51
+ input.each do |object|
52
+
53
+ if object.operator?
54
+ op1 = object
55
+
56
+ # while we have an operator on the temp stack
57
+ # and that op on the temp stack has a higher precedence than the current op
58
+ while (op2 = operator_stack.last) && (op1.left_associative? ? op1.precedence <= op2.precedence : op1.precedence < op2.precedence)
59
+ rpn << operator_stack.pop
60
+ end
61
+ operator_stack << op1
62
+ else
63
+ rpn << object
64
+ end
65
+ end
66
+ rpn << operator_stack.pop until operator_stack.empty?
67
+ end
68
+ end
69
+
70
+ def shunting_yard_old(input)
71
+ input = input.map { |x| MathWrapper.new(:str => x) }
72
+ res = shunting_yard_inner(input)
73
+ res.map { |x| x.str }
74
+ end
75
+
76
+ def rpn(input)
77
+ results = []
78
+ input.each do |object|
79
+ if object.operator?
80
+ r, l = results.pop, results.pop
81
+ results << object.apply(l, r)
82
+ else
83
+ results << object
84
+ end
85
+ end
86
+ results.first
87
+ end
88
+
89
+ def parse_eval(input)
90
+ raw_input = input
91
+ #raise input.map { |x| x.text_value }.inspect
92
+ input = input.map { |x| MathWrapper.new(:str => (x.respond_to?(:excel_value) ? x.excel_value : x.text_value)) }
93
+ #input = input.split(" ") if input.kind_of?(String)
94
+ res = shunting_yard(input)
95
+ #puts "before rpn #{res.inspect}"
96
+ begin
97
+ res = rpn(res)
98
+ rescue => exp
99
+ puts raw_input.map { |x| x.text_value }.inspect
100
+ puts res.inspect
101
+ raise exp
102
+ end
103
+ end
104
+
105
+ class << self
106
+ def method_missing(sym,*args,&b)
107
+ new.send(sym,*args,&b)
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,90 @@
1
+
2
+
3
+ #Treetop.load "lib/extract/math"
4
+ #Treetop.load "lib/extract/formula"
5
+
6
+ {:math => "MathMy", :formula => "Formula"}.each do |f,c|
7
+ f = File.expand_path(File.dirname(__FILE__)) + "/#{f}"
8
+ Treetop.load f
9
+ #Object.send(:remove_const,"#{c}Parser")
10
+ end
11
+
12
+ {:math => "MathMy", :formula => "Formula"}.each do |f,c|
13
+ #Treetop.load "lib/extract/#{f}"
14
+ end
15
+
16
+
17
+
18
+
19
+ class Object
20
+ attr_accessor :root_sheet
21
+
22
+ def find_sheet
23
+ if root_sheet
24
+ root_sheet
25
+ elsif parent
26
+ parent.find_sheet
27
+ else
28
+ raise "can't find sheet"
29
+ end
30
+ end
31
+ end
32
+
33
+ module Extract
34
+ class Parser
35
+ include FromHash
36
+ attr_accessor :str, :sheet
37
+
38
+ def result
39
+ p = FormulaParser.new
40
+ res = p.parse(str.gsub(" ",""))
41
+ if !res
42
+ strs = []
43
+ strs << p.failure_reason
44
+ strs << p.failure_line
45
+ strs << p.failure_column
46
+ strs << "no result for #{str}"
47
+ raise strs.join("\n")
48
+ end
49
+ res
50
+ end
51
+
52
+ def excel_value
53
+ res = result
54
+ raise "can't parse #{str}" unless res
55
+ res.root_sheet = sheet
56
+ res.meat.excel_value
57
+ end
58
+
59
+ def deps
60
+ res = result
61
+ res.root_sheet = sheet
62
+ #raise res.meat.inspect unless res.meat.respond_to?(:deps)
63
+ raise "can't parse #{str}" unless res
64
+ res.meat.deps.flatten.uniq.map do |c|
65
+ if c =~ /"/
66
+ nil
67
+ else
68
+ c.gsub("$","")
69
+ end
70
+ end.select { |x| x }.sort.uniq
71
+ end
72
+ end
73
+ end
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
@@ -0,0 +1,26 @@
1
+ module Extract
2
+ module Persist
3
+ class Sheet
4
+ include Mongoid::Document
5
+
6
+ field :cells, :type => Hash
7
+
8
+ field :input_cells, :type => Array
9
+ field :output_cells, :type => Array
10
+
11
+
12
+
13
+ def sheet_def
14
+ sheet = Extract::Sheet.new
15
+ cells.each do |k,v|
16
+ sheet.cells[k] = v
17
+ end
18
+
19
+ res = Extract::SheetDefinition.new(:sheet => sheet, :output_cells => output_cells)
20
+ res
21
+ end
22
+
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,85 @@
1
+ module Extract
2
+ class Sheet
3
+ include FromHash
4
+ fattr(:cells) { {} }
5
+ fattr(:cache) { {} }
6
+ fattr(:loaded_values) { {} }
7
+
8
+ def []=(c,val)
9
+ self.cells[c] = val
10
+ end
11
+ def [](c)
12
+ res = cells[c]
13
+ #puts "doing #{c} #{res}"
14
+ if res.to_s =~ /^=/
15
+ self.cache[c] ||= Extract::Parser.new(:str => res, :sheet => self).excel_value
16
+ else
17
+ res
18
+ end
19
+ end
20
+
21
+ def clear_cache!
22
+ self.cache = {}
23
+ end
24
+
25
+ def eval(str)
26
+ Extract::Parser.new(:str => str, :sheet => self).excel_value
27
+ end
28
+
29
+ def deps(c)
30
+ res = cells[c]
31
+ res = if res.to_s =~ /^=/
32
+ d = Extract::Parser.new(:str => res, :sheet => self).deps
33
+ d.map do |dep|
34
+ d2 = deps(dep)
35
+ if d2.empty?
36
+ dep
37
+ else
38
+ d2
39
+ end
40
+ end.flatten
41
+ else
42
+ []
43
+ end
44
+ res.flatten.uniq.map do |c|
45
+ if c =~ /"/
46
+ nil
47
+ else
48
+ c.gsub("$","")
49
+ end
50
+ end.select { |x| x }.sort.uniq
51
+ end
52
+
53
+ def each_value_comp
54
+ loaded_values.each do |k,v|
55
+ yield k,cells[k],self[k],v
56
+ end
57
+ end
58
+
59
+
60
+ class << self
61
+ def load(file)
62
+ w = Roo::Excelx.new(file)
63
+ w.default_sheet = w.sheets.first
64
+
65
+ sheet = Extract::Sheet.new
66
+
67
+ ("A".."Z").each do |col|
68
+ (1..100).each do |row|
69
+ val = if w.formula?(row,col)
70
+ "=" + w.formula(row,col).gsub(" ","")
71
+ else
72
+ w.cell(row,col)
73
+ end
74
+ loaded = w.cell(row,col)
75
+ sheet["#{col}#{row}"] = val if val.present?
76
+ sheet.loaded_values["#{col}#{row}"] = loaded if loaded.present?
77
+ end
78
+ end
79
+
80
+ sheet
81
+ end
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,7 @@
1
+ module Extract
2
+ class SheetComp
3
+ include FromHash
4
+ attr_accessor :filename
5
+
6
+ end
7
+ end
@@ -0,0 +1,127 @@
1
+ module Extract
2
+ class SheetDefinition
3
+ include FromHash
4
+ attr_accessor :sheet
5
+
6
+ def prev_letter(letter)
7
+ r = ("A".."Z").to_a
8
+ raise "bad letter #{letter}" unless r.index(letter)
9
+ i = r.index(letter) - 1
10
+ r[i]
11
+ end
12
+ def left(c)
13
+ col = c[0..0]
14
+ row = c[1..-1]
15
+ col = prev_letter(col)
16
+ "#{col}#{row}"
17
+ end
18
+ fattr(:cell_names) do
19
+ res = {}
20
+ (input_cells + output_cells).each do |c|
21
+ n = left(c)
22
+ res[c] = sheet[n]
23
+ end
24
+ res
25
+ end
26
+ fattr(:output_cells) { [] }
27
+ def output_cells=(arr)
28
+ @output_cells = arr.map do |c|
29
+ if c =~ /:/
30
+ Extract::Tree::Range.cells_in_range(c)
31
+ else
32
+ c
33
+ end
34
+ end.flatten
35
+ end
36
+
37
+ fattr(:dep_map) do
38
+ res = {}
39
+ output_cells.each do |output_cell|
40
+ res[output_cell] = sheet.deps(output_cell).flatten.uniq.map do |c|
41
+ if c =~ /"/
42
+ nil
43
+ else
44
+ c.gsub("$","")
45
+ end
46
+ end.select { |x| x }.sort.uniq
47
+ end
48
+ res
49
+ end
50
+
51
+ fattr(:input_cells) do
52
+ output_cells.map do |c|
53
+ a = dep_map[c] || []
54
+ a = [c] if a.empty?
55
+ a
56
+ end.flatten.uniq.sort
57
+ end
58
+
59
+ def setup_persisted_sheet!(res=nil)
60
+ res.cells = {}
61
+ res.input_cells = []
62
+ res.output_cells = []
63
+
64
+ sheet.cells.each do |k,v|
65
+ res.cells[k] = v
66
+ end
67
+
68
+ input_cells.each do |c|
69
+ res.input_cells << c
70
+ end
71
+
72
+ output_cells.each do |c|
73
+ res.output_cells << c
74
+ end
75
+
76
+ res
77
+ end
78
+
79
+ def save!(res=nil)
80
+ res ||= Persist::Sheet.new
81
+ setup_persisted_sheet! res
82
+ res.save!
83
+ res
84
+ end
85
+
86
+ def [](c)
87
+ sheet[c]
88
+ end
89
+
90
+ def each_input
91
+ input_cells.each do |cell|
92
+ yield cell, cell_names[cell],sheet[cell]
93
+ end
94
+ end
95
+
96
+ def each_output
97
+ output_cells.sort.each do |cell|
98
+ yield cell, cell_names[cell],sheet[cell],dep_map[cell],sheet.cells[cell]
99
+ end
100
+ end
101
+
102
+ def each_other
103
+ res = []
104
+ bad = input_cells + output_cells
105
+ sheet.cells.each do |k,v|
106
+ if !bad.include?(k)
107
+ res << k
108
+ end
109
+ end
110
+
111
+ res.each do |c|
112
+ d = sheet.deps(c)
113
+ yield c,sheet.cells[c],d if sheet.cells[c].present? && d.size > 0
114
+ end
115
+
116
+ end
117
+
118
+ class << self
119
+ def load(file,output)
120
+ res = new
121
+ res.sheet = Sheet.load(file)
122
+ res.output_cells = output
123
+ res
124
+ end
125
+ end
126
+ end
127
+ end