extract 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.lre +1 -0
- data/.rspec +1 -0
- data/Gemfile +27 -0
- data/Gemfile.lock +108 -0
- data/Guardfile +27 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/extract.gemspec +128 -0
- data/lib/extract.rb +39 -0
- data/lib/extract/excel_formulas.rb +44 -0
- data/lib/extract/formula.treetop +66 -0
- data/lib/extract/math.treetop +33 -0
- data/lib/extract/math_calc.rb +111 -0
- data/lib/extract/parser.rb +90 -0
- data/lib/extract/persist/sheet.rb +26 -0
- data/lib/extract/sheet.rb +85 -0
- data/lib/extract/sheet_comp.rb +7 -0
- data/lib/extract/sheet_definition.rb +127 -0
- data/lib/extract/tree/base.rb +7 -0
- data/lib/extract/tree/cell.rb +33 -0
- data/lib/extract/tree/cond_exp.rb +25 -0
- data/lib/extract/tree/formula.rb +24 -0
- data/lib/extract/tree/formula_args.rb +30 -0
- data/lib/extract/tree/math.rb +106 -0
- data/lib/extract/tree/num.rb +18 -0
- data/lib/extract/tree/operator.rb +9 -0
- data/lib/extract/tree/range.rb +58 -0
- data/lib/extract/tree/string.rb +12 -0
- data/samples/baseball.xlsx +0 -0
- data/samples/div.xlsx +0 -0
- data/spec/config/mongoid.yml +6 -0
- data/spec/deps_spec.rb +48 -0
- data/spec/extract_spec.rb +44 -0
- data/spec/math_spec.rb +52 -0
- data/spec/parser_spec.rb +145 -0
- data/spec/persist_spec.rb +34 -0
- data/spec/sheet_definition_spec.rb +46 -0
- data/spec/sheet_spec.rb +51 -0
- data/spec/spec_helper.rb +68 -0
- data/vol/excel_test.rb +55 -0
- data/vol/parse_test.rb +8 -0
- data/vol/scratch.rb +61 -0
- data/vol/web.rb +0 -0
- data/vol/yaml_test.rb +4 -0
- data/web/file.tmp +0 -0
- data/web/file.xlsx +0 -0
- data/web/main.rb +59 -0
- data/web/mongoid.yml +6 -0
- data/web/views/index.haml +39 -0
- data/web/views/upload.haml +13 -0
- metadata +311 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
grammar Formula
|
2
|
+
include MathMy
|
3
|
+
|
4
|
+
rule top
|
5
|
+
"=" meat:(cond / range / math_exp_full / formula / cell / num)
|
6
|
+
end
|
7
|
+
|
8
|
+
rule primary
|
9
|
+
num / cell / formula
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
rule cell
|
15
|
+
"-"? "$"? c:[A-Z] "$"? r:[0-9]+ <Extract::Tree::Cell>
|
16
|
+
end
|
17
|
+
|
18
|
+
rule range
|
19
|
+
a:cell ":" b:cell <Extract::Tree::Range>
|
20
|
+
end
|
21
|
+
|
22
|
+
rule cell_or_range
|
23
|
+
range / cell
|
24
|
+
end
|
25
|
+
|
26
|
+
rule num
|
27
|
+
"-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
rule formula
|
32
|
+
formula_name "(" formula_args ")" <Extract::Tree::Formula>
|
33
|
+
end
|
34
|
+
|
35
|
+
rule formula_name
|
36
|
+
"THING" / "DOUBLE" / "SUM" / "IF" / "MAX" / "VLOOKUP" / "SQRT" / "COMBIN"
|
37
|
+
end
|
38
|
+
|
39
|
+
rule formula_arg
|
40
|
+
cond / math_exp_full / formula / range / cell / num / string
|
41
|
+
end
|
42
|
+
|
43
|
+
rule formula_args
|
44
|
+
formula_arg rest:("," fa:formula_arg)* <Extract::Tree::FormulaArgs>
|
45
|
+
end
|
46
|
+
|
47
|
+
rule cond_exp
|
48
|
+
a:(num / cell / string / formula / math_exp_full) op:(">=" / "<=" / "=" / ">" / "<") b:(num / cell / string / formula / math_exp_full) <Extract::Tree::CondExp>
|
49
|
+
end
|
50
|
+
|
51
|
+
rule string
|
52
|
+
"\"" [^"]+ "\"" <Extract::Tree::String>
|
53
|
+
end
|
54
|
+
|
55
|
+
rule cond_const
|
56
|
+
("TRUE" / "FALSE") {
|
57
|
+
def excel_value
|
58
|
+
(text_value == "TRUE") ? true : false
|
59
|
+
end
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
rule cond
|
64
|
+
cond_exp / cond_const
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
grammar MathMy
|
2
|
+
rule math_exp_full
|
3
|
+
(paren_exp / primary) rest:(op math_exp)+ <Extract::Tree::Math>
|
4
|
+
end
|
5
|
+
|
6
|
+
rule naked_exp
|
7
|
+
primary rest:(op math_exp)* <Extract::Tree::Math>
|
8
|
+
end
|
9
|
+
|
10
|
+
rule paren_exp
|
11
|
+
"(" math_exp ")" <Extract::Tree::ParenMath>
|
12
|
+
end
|
13
|
+
|
14
|
+
rule math_exp
|
15
|
+
(paren_exp / naked_exp) rest:(op math_exp)* <Extract::Tree::Math>
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
rule num
|
22
|
+
"-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
|
23
|
+
end
|
24
|
+
|
25
|
+
rule op
|
26
|
+
("+" / "-" / "*" / "/" / "^") <Extract::Tree::Operator>
|
27
|
+
end
|
28
|
+
|
29
|
+
rule primary
|
30
|
+
num
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Extract
|
4
|
+
class MathWrapper
|
5
|
+
include FromHash
|
6
|
+
attr_accessor :str
|
7
|
+
|
8
|
+
def left_associative?
|
9
|
+
true
|
10
|
+
end
|
11
|
+
def operator?
|
12
|
+
%w(+ - / * ^).include?(str)
|
13
|
+
end
|
14
|
+
def precedence
|
15
|
+
h = {"*" => 10, "/" => 10, "+" => 5, "-" => 5, "^" => 15}
|
16
|
+
h[str]
|
17
|
+
end
|
18
|
+
def apply(l,r)
|
19
|
+
#puts "apply call #{l} #{str} #{r}"
|
20
|
+
raise "bad apply, not an operator" unless operator?
|
21
|
+
|
22
|
+
#raise "bad apply, L #{l} R #{r}" unless l.to_s.present? && r.to_s.present?
|
23
|
+
|
24
|
+
l.str = "0" if l.respond_to?(:str) && l.str.blank?
|
25
|
+
r.str = "0" if r.respond_to?(:str) && r.str.blank?
|
26
|
+
|
27
|
+
op = str
|
28
|
+
op = "**" if op == "^"
|
29
|
+
|
30
|
+
exp = "#{l.to_s} #{op} #{r.to_s}"
|
31
|
+
return 0 if exp =~ /infinity/i || exp =~ /[a-z]/i
|
32
|
+
#puts "evaling #{exp}"
|
33
|
+
#puts "eval, L #{l.class} #{l.inspect} #{str} R #{r.inspect}"
|
34
|
+
raise exp if exp =~ /[a-z]/i
|
35
|
+
res = eval(exp)
|
36
|
+
#puts "evaled #{exp} to #{res}"
|
37
|
+
res
|
38
|
+
end
|
39
|
+
def to_s
|
40
|
+
str
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class MathCalc
|
45
|
+
def shunting_yard(input)
|
46
|
+
[].tap do |rpn|
|
47
|
+
|
48
|
+
# where I store operators before putting them onto final rpn list
|
49
|
+
operator_stack = []
|
50
|
+
|
51
|
+
input.each do |object|
|
52
|
+
|
53
|
+
if object.operator?
|
54
|
+
op1 = object
|
55
|
+
|
56
|
+
# while we have an operator on the temp stack
|
57
|
+
# and that op on the temp stack has a higher precedence than the current op
|
58
|
+
while (op2 = operator_stack.last) && (op1.left_associative? ? op1.precedence <= op2.precedence : op1.precedence < op2.precedence)
|
59
|
+
rpn << operator_stack.pop
|
60
|
+
end
|
61
|
+
operator_stack << op1
|
62
|
+
else
|
63
|
+
rpn << object
|
64
|
+
end
|
65
|
+
end
|
66
|
+
rpn << operator_stack.pop until operator_stack.empty?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def shunting_yard_old(input)
|
71
|
+
input = input.map { |x| MathWrapper.new(:str => x) }
|
72
|
+
res = shunting_yard_inner(input)
|
73
|
+
res.map { |x| x.str }
|
74
|
+
end
|
75
|
+
|
76
|
+
def rpn(input)
|
77
|
+
results = []
|
78
|
+
input.each do |object|
|
79
|
+
if object.operator?
|
80
|
+
r, l = results.pop, results.pop
|
81
|
+
results << object.apply(l, r)
|
82
|
+
else
|
83
|
+
results << object
|
84
|
+
end
|
85
|
+
end
|
86
|
+
results.first
|
87
|
+
end
|
88
|
+
|
89
|
+
def parse_eval(input)
|
90
|
+
raw_input = input
|
91
|
+
#raise input.map { |x| x.text_value }.inspect
|
92
|
+
input = input.map { |x| MathWrapper.new(:str => (x.respond_to?(:excel_value) ? x.excel_value : x.text_value)) }
|
93
|
+
#input = input.split(" ") if input.kind_of?(String)
|
94
|
+
res = shunting_yard(input)
|
95
|
+
#puts "before rpn #{res.inspect}"
|
96
|
+
begin
|
97
|
+
res = rpn(res)
|
98
|
+
rescue => exp
|
99
|
+
puts raw_input.map { |x| x.text_value }.inspect
|
100
|
+
puts res.inspect
|
101
|
+
raise exp
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class << self
|
106
|
+
def method_missing(sym,*args,&b)
|
107
|
+
new.send(sym,*args,&b)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
#Treetop.load "lib/extract/math"
|
4
|
+
#Treetop.load "lib/extract/formula"
|
5
|
+
|
6
|
+
{:math => "MathMy", :formula => "Formula"}.each do |f,c|
|
7
|
+
f = File.expand_path(File.dirname(__FILE__)) + "/#{f}"
|
8
|
+
Treetop.load f
|
9
|
+
#Object.send(:remove_const,"#{c}Parser")
|
10
|
+
end
|
11
|
+
|
12
|
+
{:math => "MathMy", :formula => "Formula"}.each do |f,c|
|
13
|
+
#Treetop.load "lib/extract/#{f}"
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
class Object
|
20
|
+
attr_accessor :root_sheet
|
21
|
+
|
22
|
+
def find_sheet
|
23
|
+
if root_sheet
|
24
|
+
root_sheet
|
25
|
+
elsif parent
|
26
|
+
parent.find_sheet
|
27
|
+
else
|
28
|
+
raise "can't find sheet"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Extract
|
34
|
+
class Parser
|
35
|
+
include FromHash
|
36
|
+
attr_accessor :str, :sheet
|
37
|
+
|
38
|
+
def result
|
39
|
+
p = FormulaParser.new
|
40
|
+
res = p.parse(str.gsub(" ",""))
|
41
|
+
if !res
|
42
|
+
strs = []
|
43
|
+
strs << p.failure_reason
|
44
|
+
strs << p.failure_line
|
45
|
+
strs << p.failure_column
|
46
|
+
strs << "no result for #{str}"
|
47
|
+
raise strs.join("\n")
|
48
|
+
end
|
49
|
+
res
|
50
|
+
end
|
51
|
+
|
52
|
+
def excel_value
|
53
|
+
res = result
|
54
|
+
raise "can't parse #{str}" unless res
|
55
|
+
res.root_sheet = sheet
|
56
|
+
res.meat.excel_value
|
57
|
+
end
|
58
|
+
|
59
|
+
def deps
|
60
|
+
res = result
|
61
|
+
res.root_sheet = sheet
|
62
|
+
#raise res.meat.inspect unless res.meat.respond_to?(:deps)
|
63
|
+
raise "can't parse #{str}" unless res
|
64
|
+
res.meat.deps.flatten.uniq.map do |c|
|
65
|
+
if c =~ /"/
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
c.gsub("$","")
|
69
|
+
end
|
70
|
+
end.select { |x| x }.sort.uniq
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Extract
|
2
|
+
module Persist
|
3
|
+
class Sheet
|
4
|
+
include Mongoid::Document
|
5
|
+
|
6
|
+
field :cells, :type => Hash
|
7
|
+
|
8
|
+
field :input_cells, :type => Array
|
9
|
+
field :output_cells, :type => Array
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
def sheet_def
|
14
|
+
sheet = Extract::Sheet.new
|
15
|
+
cells.each do |k,v|
|
16
|
+
sheet.cells[k] = v
|
17
|
+
end
|
18
|
+
|
19
|
+
res = Extract::SheetDefinition.new(:sheet => sheet, :output_cells => output_cells)
|
20
|
+
res
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Extract
|
2
|
+
class Sheet
|
3
|
+
include FromHash
|
4
|
+
fattr(:cells) { {} }
|
5
|
+
fattr(:cache) { {} }
|
6
|
+
fattr(:loaded_values) { {} }
|
7
|
+
|
8
|
+
def []=(c,val)
|
9
|
+
self.cells[c] = val
|
10
|
+
end
|
11
|
+
def [](c)
|
12
|
+
res = cells[c]
|
13
|
+
#puts "doing #{c} #{res}"
|
14
|
+
if res.to_s =~ /^=/
|
15
|
+
self.cache[c] ||= Extract::Parser.new(:str => res, :sheet => self).excel_value
|
16
|
+
else
|
17
|
+
res
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def clear_cache!
|
22
|
+
self.cache = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def eval(str)
|
26
|
+
Extract::Parser.new(:str => str, :sheet => self).excel_value
|
27
|
+
end
|
28
|
+
|
29
|
+
def deps(c)
|
30
|
+
res = cells[c]
|
31
|
+
res = if res.to_s =~ /^=/
|
32
|
+
d = Extract::Parser.new(:str => res, :sheet => self).deps
|
33
|
+
d.map do |dep|
|
34
|
+
d2 = deps(dep)
|
35
|
+
if d2.empty?
|
36
|
+
dep
|
37
|
+
else
|
38
|
+
d2
|
39
|
+
end
|
40
|
+
end.flatten
|
41
|
+
else
|
42
|
+
[]
|
43
|
+
end
|
44
|
+
res.flatten.uniq.map do |c|
|
45
|
+
if c =~ /"/
|
46
|
+
nil
|
47
|
+
else
|
48
|
+
c.gsub("$","")
|
49
|
+
end
|
50
|
+
end.select { |x| x }.sort.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
def each_value_comp
|
54
|
+
loaded_values.each do |k,v|
|
55
|
+
yield k,cells[k],self[k],v
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
class << self
|
61
|
+
def load(file)
|
62
|
+
w = Roo::Excelx.new(file)
|
63
|
+
w.default_sheet = w.sheets.first
|
64
|
+
|
65
|
+
sheet = Extract::Sheet.new
|
66
|
+
|
67
|
+
("A".."Z").each do |col|
|
68
|
+
(1..100).each do |row|
|
69
|
+
val = if w.formula?(row,col)
|
70
|
+
"=" + w.formula(row,col).gsub(" ","")
|
71
|
+
else
|
72
|
+
w.cell(row,col)
|
73
|
+
end
|
74
|
+
loaded = w.cell(row,col)
|
75
|
+
sheet["#{col}#{row}"] = val if val.present?
|
76
|
+
sheet.loaded_values["#{col}#{row}"] = loaded if loaded.present?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
sheet
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Extract
|
2
|
+
class SheetDefinition
|
3
|
+
include FromHash
|
4
|
+
attr_accessor :sheet
|
5
|
+
|
6
|
+
def prev_letter(letter)
|
7
|
+
r = ("A".."Z").to_a
|
8
|
+
raise "bad letter #{letter}" unless r.index(letter)
|
9
|
+
i = r.index(letter) - 1
|
10
|
+
r[i]
|
11
|
+
end
|
12
|
+
def left(c)
|
13
|
+
col = c[0..0]
|
14
|
+
row = c[1..-1]
|
15
|
+
col = prev_letter(col)
|
16
|
+
"#{col}#{row}"
|
17
|
+
end
|
18
|
+
fattr(:cell_names) do
|
19
|
+
res = {}
|
20
|
+
(input_cells + output_cells).each do |c|
|
21
|
+
n = left(c)
|
22
|
+
res[c] = sheet[n]
|
23
|
+
end
|
24
|
+
res
|
25
|
+
end
|
26
|
+
fattr(:output_cells) { [] }
|
27
|
+
def output_cells=(arr)
|
28
|
+
@output_cells = arr.map do |c|
|
29
|
+
if c =~ /:/
|
30
|
+
Extract::Tree::Range.cells_in_range(c)
|
31
|
+
else
|
32
|
+
c
|
33
|
+
end
|
34
|
+
end.flatten
|
35
|
+
end
|
36
|
+
|
37
|
+
fattr(:dep_map) do
|
38
|
+
res = {}
|
39
|
+
output_cells.each do |output_cell|
|
40
|
+
res[output_cell] = sheet.deps(output_cell).flatten.uniq.map do |c|
|
41
|
+
if c =~ /"/
|
42
|
+
nil
|
43
|
+
else
|
44
|
+
c.gsub("$","")
|
45
|
+
end
|
46
|
+
end.select { |x| x }.sort.uniq
|
47
|
+
end
|
48
|
+
res
|
49
|
+
end
|
50
|
+
|
51
|
+
fattr(:input_cells) do
|
52
|
+
output_cells.map do |c|
|
53
|
+
a = dep_map[c] || []
|
54
|
+
a = [c] if a.empty?
|
55
|
+
a
|
56
|
+
end.flatten.uniq.sort
|
57
|
+
end
|
58
|
+
|
59
|
+
def setup_persisted_sheet!(res=nil)
|
60
|
+
res.cells = {}
|
61
|
+
res.input_cells = []
|
62
|
+
res.output_cells = []
|
63
|
+
|
64
|
+
sheet.cells.each do |k,v|
|
65
|
+
res.cells[k] = v
|
66
|
+
end
|
67
|
+
|
68
|
+
input_cells.each do |c|
|
69
|
+
res.input_cells << c
|
70
|
+
end
|
71
|
+
|
72
|
+
output_cells.each do |c|
|
73
|
+
res.output_cells << c
|
74
|
+
end
|
75
|
+
|
76
|
+
res
|
77
|
+
end
|
78
|
+
|
79
|
+
def save!(res=nil)
|
80
|
+
res ||= Persist::Sheet.new
|
81
|
+
setup_persisted_sheet! res
|
82
|
+
res.save!
|
83
|
+
res
|
84
|
+
end
|
85
|
+
|
86
|
+
def [](c)
|
87
|
+
sheet[c]
|
88
|
+
end
|
89
|
+
|
90
|
+
def each_input
|
91
|
+
input_cells.each do |cell|
|
92
|
+
yield cell, cell_names[cell],sheet[cell]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def each_output
|
97
|
+
output_cells.sort.each do |cell|
|
98
|
+
yield cell, cell_names[cell],sheet[cell],dep_map[cell],sheet.cells[cell]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def each_other
|
103
|
+
res = []
|
104
|
+
bad = input_cells + output_cells
|
105
|
+
sheet.cells.each do |k,v|
|
106
|
+
if !bad.include?(k)
|
107
|
+
res << k
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
res.each do |c|
|
112
|
+
d = sheet.deps(c)
|
113
|
+
yield c,sheet.cells[c],d if sheet.cells[c].present? && d.size > 0
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
class << self
|
119
|
+
def load(file,output)
|
120
|
+
res = new
|
121
|
+
res.sheet = Sheet.load(file)
|
122
|
+
res.output_cells = output
|
123
|
+
res
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|