extract 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.lre +1 -0
- data/.rspec +1 -0
- data/Gemfile +27 -0
- data/Gemfile.lock +108 -0
- data/Guardfile +27 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/extract.gemspec +128 -0
- data/lib/extract.rb +39 -0
- data/lib/extract/excel_formulas.rb +44 -0
- data/lib/extract/formula.treetop +66 -0
- data/lib/extract/math.treetop +33 -0
- data/lib/extract/math_calc.rb +111 -0
- data/lib/extract/parser.rb +90 -0
- data/lib/extract/persist/sheet.rb +26 -0
- data/lib/extract/sheet.rb +85 -0
- data/lib/extract/sheet_comp.rb +7 -0
- data/lib/extract/sheet_definition.rb +127 -0
- data/lib/extract/tree/base.rb +7 -0
- data/lib/extract/tree/cell.rb +33 -0
- data/lib/extract/tree/cond_exp.rb +25 -0
- data/lib/extract/tree/formula.rb +24 -0
- data/lib/extract/tree/formula_args.rb +30 -0
- data/lib/extract/tree/math.rb +106 -0
- data/lib/extract/tree/num.rb +18 -0
- data/lib/extract/tree/operator.rb +9 -0
- data/lib/extract/tree/range.rb +58 -0
- data/lib/extract/tree/string.rb +12 -0
- data/samples/baseball.xlsx +0 -0
- data/samples/div.xlsx +0 -0
- data/spec/config/mongoid.yml +6 -0
- data/spec/deps_spec.rb +48 -0
- data/spec/extract_spec.rb +44 -0
- data/spec/math_spec.rb +52 -0
- data/spec/parser_spec.rb +145 -0
- data/spec/persist_spec.rb +34 -0
- data/spec/sheet_definition_spec.rb +46 -0
- data/spec/sheet_spec.rb +51 -0
- data/spec/spec_helper.rb +68 -0
- data/vol/excel_test.rb +55 -0
- data/vol/parse_test.rb +8 -0
- data/vol/scratch.rb +61 -0
- data/vol/web.rb +0 -0
- data/vol/yaml_test.rb +4 -0
- data/web/file.tmp +0 -0
- data/web/file.xlsx +0 -0
- data/web/main.rb +59 -0
- data/web/mongoid.yml +6 -0
- data/web/views/index.haml +39 -0
- data/web/views/upload.haml +13 -0
- metadata +311 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
grammar Formula
|
2
|
+
include MathMy
|
3
|
+
|
4
|
+
rule top
|
5
|
+
"=" meat:(cond / range / math_exp_full / formula / cell / num)
|
6
|
+
end
|
7
|
+
|
8
|
+
rule primary
|
9
|
+
num / cell / formula
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
rule cell
|
15
|
+
"-"? "$"? c:[A-Z] "$"? r:[0-9]+ <Extract::Tree::Cell>
|
16
|
+
end
|
17
|
+
|
18
|
+
rule range
|
19
|
+
a:cell ":" b:cell <Extract::Tree::Range>
|
20
|
+
end
|
21
|
+
|
22
|
+
rule cell_or_range
|
23
|
+
range / cell
|
24
|
+
end
|
25
|
+
|
26
|
+
rule num
|
27
|
+
"-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
rule formula
|
32
|
+
formula_name "(" formula_args ")" <Extract::Tree::Formula>
|
33
|
+
end
|
34
|
+
|
35
|
+
rule formula_name
|
36
|
+
"THING" / "DOUBLE" / "SUM" / "IF" / "MAX" / "VLOOKUP" / "SQRT" / "COMBIN"
|
37
|
+
end
|
38
|
+
|
39
|
+
rule formula_arg
|
40
|
+
cond / math_exp_full / formula / range / cell / num / string
|
41
|
+
end
|
42
|
+
|
43
|
+
rule formula_args
|
44
|
+
formula_arg rest:("," fa:formula_arg)* <Extract::Tree::FormulaArgs>
|
45
|
+
end
|
46
|
+
|
47
|
+
rule cond_exp
|
48
|
+
a:(num / cell / string / formula / math_exp_full) op:(">=" / "<=" / "=" / ">" / "<") b:(num / cell / string / formula / math_exp_full) <Extract::Tree::CondExp>
|
49
|
+
end
|
50
|
+
|
51
|
+
rule string
|
52
|
+
"\"" [^"]+ "\"" <Extract::Tree::String>
|
53
|
+
end
|
54
|
+
|
55
|
+
rule cond_const
|
56
|
+
("TRUE" / "FALSE") {
|
57
|
+
def excel_value
|
58
|
+
(text_value == "TRUE") ? true : false
|
59
|
+
end
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
rule cond
|
64
|
+
cond_exp / cond_const
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
grammar MathMy
|
2
|
+
rule math_exp_full
|
3
|
+
(paren_exp / primary) rest:(op math_exp)+ <Extract::Tree::Math>
|
4
|
+
end
|
5
|
+
|
6
|
+
rule naked_exp
|
7
|
+
primary rest:(op math_exp)* <Extract::Tree::Math>
|
8
|
+
end
|
9
|
+
|
10
|
+
rule paren_exp
|
11
|
+
"(" math_exp ")" <Extract::Tree::ParenMath>
|
12
|
+
end
|
13
|
+
|
14
|
+
rule math_exp
|
15
|
+
(paren_exp / naked_exp) rest:(op math_exp)* <Extract::Tree::Math>
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
rule num
|
22
|
+
"-"? [0-9]+ ("." [0-9]+)? <Extract::Tree::Num>
|
23
|
+
end
|
24
|
+
|
25
|
+
rule op
|
26
|
+
("+" / "-" / "*" / "/" / "^") <Extract::Tree::Operator>
|
27
|
+
end
|
28
|
+
|
29
|
+
rule primary
|
30
|
+
num
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Extract
|
4
|
+
class MathWrapper
|
5
|
+
include FromHash
|
6
|
+
attr_accessor :str
|
7
|
+
|
8
|
+
def left_associative?
|
9
|
+
true
|
10
|
+
end
|
11
|
+
def operator?
|
12
|
+
%w(+ - / * ^).include?(str)
|
13
|
+
end
|
14
|
+
def precedence
|
15
|
+
h = {"*" => 10, "/" => 10, "+" => 5, "-" => 5, "^" => 15}
|
16
|
+
h[str]
|
17
|
+
end
|
18
|
+
def apply(l,r)
|
19
|
+
#puts "apply call #{l} #{str} #{r}"
|
20
|
+
raise "bad apply, not an operator" unless operator?
|
21
|
+
|
22
|
+
#raise "bad apply, L #{l} R #{r}" unless l.to_s.present? && r.to_s.present?
|
23
|
+
|
24
|
+
l.str = "0" if l.respond_to?(:str) && l.str.blank?
|
25
|
+
r.str = "0" if r.respond_to?(:str) && r.str.blank?
|
26
|
+
|
27
|
+
op = str
|
28
|
+
op = "**" if op == "^"
|
29
|
+
|
30
|
+
exp = "#{l.to_s} #{op} #{r.to_s}"
|
31
|
+
return 0 if exp =~ /infinity/i || exp =~ /[a-z]/i
|
32
|
+
#puts "evaling #{exp}"
|
33
|
+
#puts "eval, L #{l.class} #{l.inspect} #{str} R #{r.inspect}"
|
34
|
+
raise exp if exp =~ /[a-z]/i
|
35
|
+
res = eval(exp)
|
36
|
+
#puts "evaled #{exp} to #{res}"
|
37
|
+
res
|
38
|
+
end
|
39
|
+
def to_s
|
40
|
+
str
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class MathCalc
|
45
|
+
def shunting_yard(input)
|
46
|
+
[].tap do |rpn|
|
47
|
+
|
48
|
+
# where I store operators before putting them onto final rpn list
|
49
|
+
operator_stack = []
|
50
|
+
|
51
|
+
input.each do |object|
|
52
|
+
|
53
|
+
if object.operator?
|
54
|
+
op1 = object
|
55
|
+
|
56
|
+
# while we have an operator on the temp stack
|
57
|
+
# and that op on the temp stack has a higher precedence than the current op
|
58
|
+
while (op2 = operator_stack.last) && (op1.left_associative? ? op1.precedence <= op2.precedence : op1.precedence < op2.precedence)
|
59
|
+
rpn << operator_stack.pop
|
60
|
+
end
|
61
|
+
operator_stack << op1
|
62
|
+
else
|
63
|
+
rpn << object
|
64
|
+
end
|
65
|
+
end
|
66
|
+
rpn << operator_stack.pop until operator_stack.empty?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def shunting_yard_old(input)
|
71
|
+
input = input.map { |x| MathWrapper.new(:str => x) }
|
72
|
+
res = shunting_yard_inner(input)
|
73
|
+
res.map { |x| x.str }
|
74
|
+
end
|
75
|
+
|
76
|
+
def rpn(input)
|
77
|
+
results = []
|
78
|
+
input.each do |object|
|
79
|
+
if object.operator?
|
80
|
+
r, l = results.pop, results.pop
|
81
|
+
results << object.apply(l, r)
|
82
|
+
else
|
83
|
+
results << object
|
84
|
+
end
|
85
|
+
end
|
86
|
+
results.first
|
87
|
+
end
|
88
|
+
|
89
|
+
def parse_eval(input)
|
90
|
+
raw_input = input
|
91
|
+
#raise input.map { |x| x.text_value }.inspect
|
92
|
+
input = input.map { |x| MathWrapper.new(:str => (x.respond_to?(:excel_value) ? x.excel_value : x.text_value)) }
|
93
|
+
#input = input.split(" ") if input.kind_of?(String)
|
94
|
+
res = shunting_yard(input)
|
95
|
+
#puts "before rpn #{res.inspect}"
|
96
|
+
begin
|
97
|
+
res = rpn(res)
|
98
|
+
rescue => exp
|
99
|
+
puts raw_input.map { |x| x.text_value }.inspect
|
100
|
+
puts res.inspect
|
101
|
+
raise exp
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class << self
|
106
|
+
def method_missing(sym,*args,&b)
|
107
|
+
new.send(sym,*args,&b)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
#Treetop.load "lib/extract/math"
|
4
|
+
#Treetop.load "lib/extract/formula"
|
5
|
+
|
6
|
+
{:math => "MathMy", :formula => "Formula"}.each do |f,c|
|
7
|
+
f = File.expand_path(File.dirname(__FILE__)) + "/#{f}"
|
8
|
+
Treetop.load f
|
9
|
+
#Object.send(:remove_const,"#{c}Parser")
|
10
|
+
end
|
11
|
+
|
12
|
+
{:math => "MathMy", :formula => "Formula"}.each do |f,c|
|
13
|
+
#Treetop.load "lib/extract/#{f}"
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
class Object
|
20
|
+
attr_accessor :root_sheet
|
21
|
+
|
22
|
+
def find_sheet
|
23
|
+
if root_sheet
|
24
|
+
root_sheet
|
25
|
+
elsif parent
|
26
|
+
parent.find_sheet
|
27
|
+
else
|
28
|
+
raise "can't find sheet"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Extract
|
34
|
+
class Parser
|
35
|
+
include FromHash
|
36
|
+
attr_accessor :str, :sheet
|
37
|
+
|
38
|
+
def result
|
39
|
+
p = FormulaParser.new
|
40
|
+
res = p.parse(str.gsub(" ",""))
|
41
|
+
if !res
|
42
|
+
strs = []
|
43
|
+
strs << p.failure_reason
|
44
|
+
strs << p.failure_line
|
45
|
+
strs << p.failure_column
|
46
|
+
strs << "no result for #{str}"
|
47
|
+
raise strs.join("\n")
|
48
|
+
end
|
49
|
+
res
|
50
|
+
end
|
51
|
+
|
52
|
+
def excel_value
|
53
|
+
res = result
|
54
|
+
raise "can't parse #{str}" unless res
|
55
|
+
res.root_sheet = sheet
|
56
|
+
res.meat.excel_value
|
57
|
+
end
|
58
|
+
|
59
|
+
def deps
|
60
|
+
res = result
|
61
|
+
res.root_sheet = sheet
|
62
|
+
#raise res.meat.inspect unless res.meat.respond_to?(:deps)
|
63
|
+
raise "can't parse #{str}" unless res
|
64
|
+
res.meat.deps.flatten.uniq.map do |c|
|
65
|
+
if c =~ /"/
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
c.gsub("$","")
|
69
|
+
end
|
70
|
+
end.select { |x| x }.sort.uniq
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Extract
|
2
|
+
module Persist
|
3
|
+
class Sheet
|
4
|
+
include Mongoid::Document
|
5
|
+
|
6
|
+
field :cells, :type => Hash
|
7
|
+
|
8
|
+
field :input_cells, :type => Array
|
9
|
+
field :output_cells, :type => Array
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
def sheet_def
|
14
|
+
sheet = Extract::Sheet.new
|
15
|
+
cells.each do |k,v|
|
16
|
+
sheet.cells[k] = v
|
17
|
+
end
|
18
|
+
|
19
|
+
res = Extract::SheetDefinition.new(:sheet => sheet, :output_cells => output_cells)
|
20
|
+
res
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Extract
|
2
|
+
class Sheet
|
3
|
+
include FromHash
|
4
|
+
fattr(:cells) { {} }
|
5
|
+
fattr(:cache) { {} }
|
6
|
+
fattr(:loaded_values) { {} }
|
7
|
+
|
8
|
+
def []=(c,val)
|
9
|
+
self.cells[c] = val
|
10
|
+
end
|
11
|
+
def [](c)
|
12
|
+
res = cells[c]
|
13
|
+
#puts "doing #{c} #{res}"
|
14
|
+
if res.to_s =~ /^=/
|
15
|
+
self.cache[c] ||= Extract::Parser.new(:str => res, :sheet => self).excel_value
|
16
|
+
else
|
17
|
+
res
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def clear_cache!
|
22
|
+
self.cache = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def eval(str)
|
26
|
+
Extract::Parser.new(:str => str, :sheet => self).excel_value
|
27
|
+
end
|
28
|
+
|
29
|
+
def deps(c)
|
30
|
+
res = cells[c]
|
31
|
+
res = if res.to_s =~ /^=/
|
32
|
+
d = Extract::Parser.new(:str => res, :sheet => self).deps
|
33
|
+
d.map do |dep|
|
34
|
+
d2 = deps(dep)
|
35
|
+
if d2.empty?
|
36
|
+
dep
|
37
|
+
else
|
38
|
+
d2
|
39
|
+
end
|
40
|
+
end.flatten
|
41
|
+
else
|
42
|
+
[]
|
43
|
+
end
|
44
|
+
res.flatten.uniq.map do |c|
|
45
|
+
if c =~ /"/
|
46
|
+
nil
|
47
|
+
else
|
48
|
+
c.gsub("$","")
|
49
|
+
end
|
50
|
+
end.select { |x| x }.sort.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
def each_value_comp
|
54
|
+
loaded_values.each do |k,v|
|
55
|
+
yield k,cells[k],self[k],v
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
class << self
|
61
|
+
def load(file)
|
62
|
+
w = Roo::Excelx.new(file)
|
63
|
+
w.default_sheet = w.sheets.first
|
64
|
+
|
65
|
+
sheet = Extract::Sheet.new
|
66
|
+
|
67
|
+
("A".."Z").each do |col|
|
68
|
+
(1..100).each do |row|
|
69
|
+
val = if w.formula?(row,col)
|
70
|
+
"=" + w.formula(row,col).gsub(" ","")
|
71
|
+
else
|
72
|
+
w.cell(row,col)
|
73
|
+
end
|
74
|
+
loaded = w.cell(row,col)
|
75
|
+
sheet["#{col}#{row}"] = val if val.present?
|
76
|
+
sheet.loaded_values["#{col}#{row}"] = loaded if loaded.present?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
sheet
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Extract
|
2
|
+
class SheetDefinition
|
3
|
+
include FromHash
|
4
|
+
attr_accessor :sheet
|
5
|
+
|
6
|
+
def prev_letter(letter)
|
7
|
+
r = ("A".."Z").to_a
|
8
|
+
raise "bad letter #{letter}" unless r.index(letter)
|
9
|
+
i = r.index(letter) - 1
|
10
|
+
r[i]
|
11
|
+
end
|
12
|
+
def left(c)
|
13
|
+
col = c[0..0]
|
14
|
+
row = c[1..-1]
|
15
|
+
col = prev_letter(col)
|
16
|
+
"#{col}#{row}"
|
17
|
+
end
|
18
|
+
fattr(:cell_names) do
|
19
|
+
res = {}
|
20
|
+
(input_cells + output_cells).each do |c|
|
21
|
+
n = left(c)
|
22
|
+
res[c] = sheet[n]
|
23
|
+
end
|
24
|
+
res
|
25
|
+
end
|
26
|
+
fattr(:output_cells) { [] }
|
27
|
+
def output_cells=(arr)
|
28
|
+
@output_cells = arr.map do |c|
|
29
|
+
if c =~ /:/
|
30
|
+
Extract::Tree::Range.cells_in_range(c)
|
31
|
+
else
|
32
|
+
c
|
33
|
+
end
|
34
|
+
end.flatten
|
35
|
+
end
|
36
|
+
|
37
|
+
fattr(:dep_map) do
|
38
|
+
res = {}
|
39
|
+
output_cells.each do |output_cell|
|
40
|
+
res[output_cell] = sheet.deps(output_cell).flatten.uniq.map do |c|
|
41
|
+
if c =~ /"/
|
42
|
+
nil
|
43
|
+
else
|
44
|
+
c.gsub("$","")
|
45
|
+
end
|
46
|
+
end.select { |x| x }.sort.uniq
|
47
|
+
end
|
48
|
+
res
|
49
|
+
end
|
50
|
+
|
51
|
+
fattr(:input_cells) do
|
52
|
+
output_cells.map do |c|
|
53
|
+
a = dep_map[c] || []
|
54
|
+
a = [c] if a.empty?
|
55
|
+
a
|
56
|
+
end.flatten.uniq.sort
|
57
|
+
end
|
58
|
+
|
59
|
+
def setup_persisted_sheet!(res=nil)
|
60
|
+
res.cells = {}
|
61
|
+
res.input_cells = []
|
62
|
+
res.output_cells = []
|
63
|
+
|
64
|
+
sheet.cells.each do |k,v|
|
65
|
+
res.cells[k] = v
|
66
|
+
end
|
67
|
+
|
68
|
+
input_cells.each do |c|
|
69
|
+
res.input_cells << c
|
70
|
+
end
|
71
|
+
|
72
|
+
output_cells.each do |c|
|
73
|
+
res.output_cells << c
|
74
|
+
end
|
75
|
+
|
76
|
+
res
|
77
|
+
end
|
78
|
+
|
79
|
+
def save!(res=nil)
|
80
|
+
res ||= Persist::Sheet.new
|
81
|
+
setup_persisted_sheet! res
|
82
|
+
res.save!
|
83
|
+
res
|
84
|
+
end
|
85
|
+
|
86
|
+
def [](c)
|
87
|
+
sheet[c]
|
88
|
+
end
|
89
|
+
|
90
|
+
def each_input
|
91
|
+
input_cells.each do |cell|
|
92
|
+
yield cell, cell_names[cell],sheet[cell]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def each_output
|
97
|
+
output_cells.sort.each do |cell|
|
98
|
+
yield cell, cell_names[cell],sheet[cell],dep_map[cell],sheet.cells[cell]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def each_other
|
103
|
+
res = []
|
104
|
+
bad = input_cells + output_cells
|
105
|
+
sheet.cells.each do |k,v|
|
106
|
+
if !bad.include?(k)
|
107
|
+
res << k
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
res.each do |c|
|
112
|
+
d = sheet.deps(c)
|
113
|
+
yield c,sheet.cells[c],d if sheet.cells[c].present? && d.size > 0
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
class << self
|
119
|
+
def load(file,output)
|
120
|
+
res = new
|
121
|
+
res.sheet = Sheet.load(file)
|
122
|
+
res.output_cells = output
|
123
|
+
res
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|