extract 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.document +5 -0
  2. data/.lre +1 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +27 -0
  5. data/Gemfile.lock +108 -0
  6. data/Guardfile +27 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.rdoc +19 -0
  9. data/Rakefile +49 -0
  10. data/VERSION +1 -0
  11. data/extract.gemspec +128 -0
  12. data/lib/extract.rb +39 -0
  13. data/lib/extract/excel_formulas.rb +44 -0
  14. data/lib/extract/formula.treetop +66 -0
  15. data/lib/extract/math.treetop +33 -0
  16. data/lib/extract/math_calc.rb +111 -0
  17. data/lib/extract/parser.rb +90 -0
  18. data/lib/extract/persist/sheet.rb +26 -0
  19. data/lib/extract/sheet.rb +85 -0
  20. data/lib/extract/sheet_comp.rb +7 -0
  21. data/lib/extract/sheet_definition.rb +127 -0
  22. data/lib/extract/tree/base.rb +7 -0
  23. data/lib/extract/tree/cell.rb +33 -0
  24. data/lib/extract/tree/cond_exp.rb +25 -0
  25. data/lib/extract/tree/formula.rb +24 -0
  26. data/lib/extract/tree/formula_args.rb +30 -0
  27. data/lib/extract/tree/math.rb +106 -0
  28. data/lib/extract/tree/num.rb +18 -0
  29. data/lib/extract/tree/operator.rb +9 -0
  30. data/lib/extract/tree/range.rb +58 -0
  31. data/lib/extract/tree/string.rb +12 -0
  32. data/samples/baseball.xlsx +0 -0
  33. data/samples/div.xlsx +0 -0
  34. data/spec/config/mongoid.yml +6 -0
  35. data/spec/deps_spec.rb +48 -0
  36. data/spec/extract_spec.rb +44 -0
  37. data/spec/math_spec.rb +52 -0
  38. data/spec/parser_spec.rb +145 -0
  39. data/spec/persist_spec.rb +34 -0
  40. data/spec/sheet_definition_spec.rb +46 -0
  41. data/spec/sheet_spec.rb +51 -0
  42. data/spec/spec_helper.rb +68 -0
  43. data/vol/excel_test.rb +55 -0
  44. data/vol/parse_test.rb +8 -0
  45. data/vol/scratch.rb +61 -0
  46. data/vol/web.rb +0 -0
  47. data/vol/yaml_test.rb +4 -0
  48. data/web/file.tmp +0 -0
  49. data/web/file.xlsx +0 -0
  50. data/web/main.rb +59 -0
  51. data/web/mongoid.yml +6 -0
  52. data/web/views/index.haml +39 -0
  53. data/web/views/upload.haml +13 -0
  54. metadata +311 -0
@@ -0,0 +1,7 @@
1
+ module Extract
2
+ module Tree
3
+ module Base
4
+
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,33 @@
1
+ module Extract
2
+ module Tree
3
+ module Cell
4
+ def proper_cell
5
+ text_value.gsub("-","").gsub("$","")
6
+ end
7
+ def leading_neg?
8
+ text_value[0..0] == '-'
9
+ end
10
+ def excel_value
11
+ res = find_sheet[proper_cell]
12
+ #raise proper_cell if text_value == "-A2"
13
+ if res.present?
14
+ leading_neg? ? res * -1 : res
15
+ else
16
+ res
17
+ end
18
+ end
19
+ def row
20
+ r.text_value.to_i
21
+ end
22
+ def col
23
+ c.text_value
24
+ end
25
+ def deps
26
+ [proper_cell]
27
+ end
28
+ def tt
29
+ :cell
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,25 @@
1
+ module Extract
2
+ module Tree
3
+ module CondExp
4
+ def excel_value
5
+ if op.text_value == "="
6
+ a.excel_value == b.excel_value
7
+ elsif op.text_value == ">"
8
+ a.excel_value > b.excel_value
9
+ elsif op.text_value == "<"
10
+ a.excel_value < b.excel_value
11
+ elsif op.text_value == ">="
12
+ a.excel_value >= b.excel_value
13
+ elsif op.text_value == "<="
14
+ a.excel_value <= b.excel_value
15
+ else
16
+ raise "bad"
17
+ end
18
+ end
19
+
20
+ def deps
21
+ [a.text_value,b.text_value].uniq
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,24 @@
1
+ module Extract
2
+ module Tree
3
+ module Formula
4
+ def excel_value
5
+ args = formula_args.excel_values
6
+ ExcelFormulas.send(formula_name.text_value.downcase,*args)
7
+ end
8
+ def deps
9
+ #raise "foo"
10
+ formula_args.deps.flatten.map do |f|
11
+ if f =~ /^-/
12
+ raise 'foo'
13
+ f[1..-1]
14
+ else
15
+ f
16
+ end
17
+ end.uniq
18
+ end
19
+ def tt
20
+ :formula
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ module Extract
2
+ module Tree
3
+ module FormulaArgs
4
+ def excel_values
5
+ res = []
6
+ res << formula_arg.excel_value
7
+
8
+ rest.elements.each do |e|
9
+ arg = e.elements[1]
10
+ res << arg.excel_value
11
+ end
12
+
13
+ res
14
+ end
15
+
16
+ def deps
17
+ res = []
18
+ res << formula_arg.deps
19
+
20
+ rest.elements.each do |e|
21
+ arg = e.elements[1]
22
+ raise "no deps for arg #{arg.inspect}" unless arg.respond_to?(:deps)
23
+ res << arg.deps
24
+ end
25
+
26
+ res.flatten
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,106 @@
1
+ module Extract
2
+ module Tree
3
+ module Math
4
+ def get_math_exp
5
+ if respond_to?(:math_exp)
6
+ math_exp
7
+ elsif respond_to?(:math_exp_full)
8
+ math_exp_full
9
+ elsif respond_to?(:num)
10
+ num
11
+ elsif respond_to?(:primary)
12
+ primary
13
+ else
14
+ nil
15
+ end
16
+ end
17
+ def excel_value
18
+ return eval
19
+ raise 'foo'
20
+ unless get_math_exp
21
+ str = %w(math_exp naked_exp cell).map { |x| "#{x} #{respond_to?(x)}" }.join(", ")
22
+ #raise str + "\n" + inspect
23
+ #raise (methods - 7.methods).inspect + "\n" + inspect
24
+ end
25
+
26
+
27
+ res = 0
28
+ #res = math_exp.excel_value if respond_to?(:math_exp)
29
+
30
+ rest.elements.each do |e|
31
+ #arg = e.elements[1]
32
+ #res += arg.excel_value
33
+ #res +=
34
+ end
35
+
36
+ res
37
+ end
38
+
39
+ def deps(start=self)
40
+ res = []
41
+ #res << get_math_exp.deps
42
+
43
+ return [] unless start.elements
44
+
45
+ start.elements.each do |e|
46
+ #arg = e.elements[1]
47
+ if e.respond_to?(:deps)
48
+ res << e.deps
49
+ else
50
+ res << deps(e)
51
+ end
52
+ end
53
+
54
+ res.flatten.select { |x| x }
55
+
56
+ end
57
+
58
+ def tokens(start=self)
59
+ if start.respond_to?("paren?")
60
+ res = start.math_exp.eval
61
+ return [OpenStruct.new(:text_value => res.to_s)]
62
+ #return [start.exp.eval]
63
+ end
64
+ #puts "parsing #{start.text_value} #{start.class}"
65
+ res = []
66
+
67
+ return res unless start.elements
68
+ start.elements.each do |el|
69
+ if el.respond_to?(:tt)
70
+ t = el.tt
71
+ if t == :num
72
+ res << el
73
+ elsif t == :operator
74
+ res << el
75
+ elsif t == :cell
76
+ res << el
77
+ elsif t == :formula
78
+ res << el
79
+ elsif t == :math
80
+ res += el.tokens
81
+ else
82
+ raise "unknown"
83
+ end
84
+ else
85
+ res += tokens(el)
86
+ end
87
+ end
88
+ res
89
+ end
90
+
91
+ def eval
92
+ #puts "evaling #{text_value}"
93
+ #raise tokens.map { |x| x.text_value }.inspect + "\n" + inspect
94
+ MathCalc.parse_eval(tokens)
95
+ end
96
+ end
97
+
98
+ module ParenMath
99
+ include Math
100
+
101
+ def paren?
102
+ true
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,18 @@
1
+ module Extract
2
+ module Tree
3
+ module Num
4
+ def excel_value
5
+ text_value.to_f
6
+ end
7
+ def deps
8
+ []
9
+ end
10
+ def eval
11
+ excel_value
12
+ end
13
+ def tt
14
+ :num
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ module Extract
2
+ module Tree
3
+ module Operator
4
+ def tt
5
+ :operator
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,58 @@
1
+ module Extract
2
+ module Tree
3
+ module Range
4
+
5
+ class << self
6
+ def cells_in_range_nodes(a,b)
7
+ res = []
8
+ ((a.row)..(b.row)).each do |r|
9
+ tmp = []
10
+ ((a.col)..(b.col)).each do |c|
11
+ tmp << "#{c}#{r}"
12
+ end
13
+ res << tmp
14
+ end
15
+ res
16
+ end
17
+
18
+ def cells_in_range(str)
19
+ arr = str.split(":").tap { |x| raise "bad" unless x.size == 2 }.map do |c|
20
+ raise "bad" unless c =~ /^([A-Z])([0-9]+)$/
21
+ OpenStruct.new(:row => $2, :col => $1)
22
+ end
23
+ cells_in_range_nodes(*arr)
24
+ end
25
+ end
26
+
27
+ def excel_value_old
28
+ res = []
29
+ ((a.row)..(b.row)).each do |r|
30
+ tmp = []
31
+ ((a.col)..(b.col)).each do |c|
32
+ tmp << find_sheet["#{c}#{r}"]
33
+ end
34
+ res << tmp
35
+ end
36
+ res
37
+ end
38
+
39
+ def excel_value
40
+ Extract::Tree::Range.cells_in_range_nodes(a,b).map do |arr|
41
+ arr.map do |c|
42
+ find_sheet[c]
43
+ end
44
+ end
45
+ end
46
+ def deps
47
+ res = []
48
+ ((a.row)..(b.row)).each do |r|
49
+ tmp = []
50
+ ((a.col)..(b.col)).each do |c|
51
+ res << "#{c}#{r}"
52
+ end
53
+ end
54
+ res.flatten.select { |x| x }
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,12 @@
1
+ module Extract
2
+ module Tree
3
+ module String
4
+ def excel_value
5
+ text_value[1..-2]
6
+ end
7
+ def deps
8
+ []
9
+ end
10
+ end
11
+ end
12
+ end
Binary file
data/samples/div.xlsx ADDED
Binary file
@@ -0,0 +1,6 @@
1
+ development:
2
+ sessions:
3
+ default:
4
+ database: mongoid_dev
5
+ hosts:
6
+ - localhost:27017
data/spec/deps_spec.rb ADDED
@@ -0,0 +1,48 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ def should_have_deps(cell,*deps)
4
+ it "#{cell} should have deps #{deps.inspect}" do
5
+ if cell =~ /^=/
6
+ sheet["X99"] = cell
7
+ sheet.deps("X99").uniq.sort.should == deps.flatten.select { |x| x }.sort
8
+ else
9
+ sheet.deps(cell).should == deps.flatten.select { |x| x }
10
+ end
11
+ end
12
+ end
13
+
14
+ describe "Deps" do
15
+ let(:sheet) do
16
+ res = Extract::Sheet.new
17
+ {"A1" => 1, "A2" => 2, "A3" => 3, "B1" => 4, "B2" => 5, "B3" => 6}.each do |k,v|
18
+ res[k] = v
19
+ end
20
+ res["C1"] = "=A1"
21
+ res['D1'] = "=C1"
22
+ res['E1'] = "=D1"
23
+
24
+ res['F1'] = "=A1+A2"
25
+ res
26
+ end
27
+ it 'smoke' do
28
+ sheet['C1'].should == 1
29
+ end
30
+
31
+ should_have_deps "C1","A1"
32
+ should_have_deps "14",[]
33
+ should_have_deps "D1","A1"
34
+ should_have_deps "E1","A1"
35
+
36
+ should_have_deps "F1","A1","A2"
37
+
38
+ should_have_deps "=A1+4","A1"
39
+
40
+ should_have_deps "=A1 = A2","A1","A2"
41
+
42
+ should_have_deps "=DOUBLE(A1)","A1"
43
+ should_have_deps "=SUM(A1,A2)","A1","A2"
44
+ should_have_deps "=SUM(A1:B2)","A1","A2","B1","B2"
45
+ should_have_deps "=SUM(A1,C1)","A1"
46
+
47
+ should_have_deps "=SUM(2+3+A1,A2)","A1","A2"
48
+ end
@@ -0,0 +1,44 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ if false
3
+ describe "Extract" do
4
+ it 'smoke' do
5
+ 2.should == 2
6
+ end
7
+
8
+ it 'cash flow' do
9
+ sheet = Extract::Sheet.load("/users/mharris717/documents/cashflow.xlsx")
10
+
11
+ #sheet["A1"] = 999
12
+ sheet["B42"].to_i.should == 14888
13
+ end
14
+
15
+ if true
16
+ it 'dev' do
17
+ sheet = Extract::Sheet.load("/users/mharris717/code/orig/extract/samples/div.xlsx")
18
+
19
+ #sheet["A1"] = 999
20
+ sheet["B52"].to_i.should == 10
21
+ %w(B38 B41 B51 B52).each do |c|
22
+ sheet[c]
23
+ end
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+
30
+ describe 'Dev Sheet' do
31
+ let(:sheet_def) do
32
+ Extract::SheetDefinition.load("/users/mharris717/code/orig/extract/samples/div.xlsx",output_cells)
33
+ end
34
+ let(:output_cells) do
35
+ %w(B38 B41 B51 B52)
36
+ end
37
+ it 'input cells' do
38
+ exp = ["B30", "B29", "B30", "B31", "B46", "B47", "C35", "D35", "E35", "F35", "G35", "H35", "I35", "J35", "K35", "L35", "M35"].sort.uniq
39
+ sheet_def.input_cells.sort.should == exp
40
+ end
41
+
42
+
43
+ end
44
+ end