statements 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +22 -0
  3. data/README.md +22 -0
  4. data/bin/statements +8 -0
  5. data/lib/html/css/main.css +115 -0
  6. data/lib/html/css/main.css.map +7 -0
  7. data/lib/html/css/main.scss +173 -0
  8. data/lib/html/index.html +94 -0
  9. data/lib/html/js/main.coffee +146 -0
  10. data/lib/html/js/main.js +174 -0
  11. data/lib/html/js/main.js.map +10 -0
  12. data/lib/html/vendor/bootstrap/css/bootstrap-theme.css +469 -0
  13. data/lib/html/vendor/bootstrap/css/bootstrap.css +6331 -0
  14. data/lib/html/vendor/bootstrap/fonts/glyphicons-halflings-regular.eot +0 -0
  15. data/lib/html/vendor/bootstrap/fonts/glyphicons-halflings-regular.svg +229 -0
  16. data/lib/html/vendor/bootstrap/fonts/glyphicons-halflings-regular.ttf +0 -0
  17. data/lib/html/vendor/bootstrap/fonts/glyphicons-halflings-regular.woff +0 -0
  18. data/lib/html/vendor/bootstrap/js/bootstrap.js +2320 -0
  19. data/lib/html/vendor/bootstrap-datepicker.js +1681 -0
  20. data/lib/html/vendor/datepicker3.css +786 -0
  21. data/lib/html/vendor/jquery-2.1.3.js +9205 -0
  22. data/lib/html/vendor/underscore.js +1416 -0
  23. data/lib/statements/cli.rb +17 -0
  24. data/lib/statements/database.rb +22 -0
  25. data/lib/statements/migrations/00_alpha.rb +43 -0
  26. data/lib/statements/models/account.rb +9 -0
  27. data/lib/statements/models/document.rb +29 -0
  28. data/lib/statements/models/transaction.rb +43 -0
  29. data/lib/statements/pdf_reader.rb +34 -0
  30. data/lib/statements/reader/common/st_george.rb +31 -0
  31. data/lib/statements/reader/st_george_credit_card.rb +53 -0
  32. data/lib/statements/reader/st_george_savings.rb +95 -0
  33. data/lib/statements/reader.rb +76 -0
  34. data/lib/statements/search.rb +48 -0
  35. data/lib/statements/server.rb +69 -0
  36. data/lib/statements/version.rb +3 -0
  37. data/lib/statements/views/footer.erb +6 -0
  38. data/lib/statements/views/row.erb +16 -0
  39. data/lib/statements/views/search.erb +25 -0
  40. data/lib/statements.rb +17 -0
  41. metadata +141 -0
@@ -0,0 +1,17 @@
1
+ require 'thin'
2
+
3
+ module Statements
4
+ class Cli
5
+
6
+ def self.main(pwd, argv)
7
+ Statements::Database.new "#{pwd}/statements.sqlite3"
8
+ Statements::Reader.read_dir pwd
9
+ Thin::Server.start '0.0.0.0', 57473 do
10
+ map('/q') { run Server.new }
11
+ use Rack::Static, urls: [''], root: "#{ROOT}/lib/html", index: 'index.html'
12
+ run Server
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ require 'active_record'
2
+
3
+ module Statements
4
+ class Database
5
+
6
+ def initialize(path = nil)
7
+ @path = ENV['DB_PATH'] || path
8
+ ActiveRecord::Base.establish_connection(
9
+ adapter: 'sqlite3',
10
+ database: @path
11
+ )
12
+ ActiveRecord::Base.logger = Logger.new(ENV['DB_LOG']) if ENV['DB_LOG']
13
+ ActiveRecord::Migrator.migrate migrations_dir
14
+ end
15
+
16
+ private
17
+
18
+ def migrations_dir
19
+ @migrations_dir ||= Statements::ROOT.join('lib/statements/migrations').to_s
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,43 @@
1
+ class Alpha < ActiveRecord::Migration
2
+ def change
3
+
4
+ create_table :documents do |t|
5
+ t.string :path
6
+ t.string :md5, limit: 32
7
+
8
+ t.timestamps null: true
9
+
10
+ t.index :md5, unique: true
11
+ end
12
+
13
+ create_table :accounts do |t|
14
+ t.string :name
15
+ t.string :number
16
+
17
+ t.timestamps null: true
18
+
19
+ t.index [:name, :number], unique: true
20
+ end
21
+
22
+ create_table :transactions do |t|
23
+ t.references :document
24
+ t.references :account
25
+ t.integer :document_line
26
+ t.datetime :transacted_at, null: true
27
+ t.datetime :posted_at, null: true
28
+ t.string :description
29
+ t.decimal :amount, precision: 2, scale: 13
30
+ t.decimal :balance, precision: 2, scale: 13
31
+ t.decimal :foreign_amount, precision: 2, scale: 13
32
+ t.string :foreign_currency, limit: 3
33
+ t.string :colour, limit: 20, default: 'white'
34
+
35
+ t.string :checksum, limit: 40
36
+
37
+ t.timestamps null: true
38
+
39
+ t.index [:document_id, :document_line], unique: true
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,9 @@
1
+ class Account < ActiveRecord::Base
2
+ def self.as_json(options = nil)
3
+ order('name asc').map &:as_json
4
+ end
5
+
6
+ def as_json(options = nil)
7
+ slice :id, :name, :number
8
+ end
9
+ end
@@ -0,0 +1,29 @@
1
+ require 'digest'
2
+
3
+ class Document < ActiveRecord::Base
4
+ has_many :transactions
5
+
6
+ def scan(base: nil)
7
+ path = base + self.path
8
+ md5 = Digest::MD5.file(path).hexdigest.downcase
9
+ print "Scanning #{self.path} ... "
10
+ if md5 == self.md5
11
+ puts 'skipping (unchanged)'
12
+ else
13
+ reader = Statements::Reader.for_file(path)
14
+ if reader
15
+ Transaction.delete_all document: self if persisted?
16
+ reader.transactions.each do |t|
17
+ t.document = self
18
+ t.save! unless Transaction.find_by('checksum = ? AND document_id != ?', t.checksum!, id || 0)
19
+ end
20
+ puts "added #{transactions.count} transactions(s)"
21
+ else
22
+ puts 'skipping (unknown format)'
23
+ end
24
+ end
25
+ self.md5 = md5
26
+ save!
27
+ end
28
+
29
+ end
@@ -0,0 +1,43 @@
1
+ require 'active_record'
2
+ require 'digest'
3
+
4
+ class Transaction < ActiveRecord::Base
5
+ belongs_to :account
6
+ belongs_to :document
7
+
8
+ before_save :checksum!
9
+
10
+ def set_account(name, number)
11
+ self.account = Account.find_or_create_by(
12
+ name: name,
13
+ number: number
14
+ )
15
+ end
16
+
17
+ def checksum!
18
+ self.checksum = calculate_checksum
19
+ end
20
+
21
+ def credit?
22
+ amount > 0
23
+ end
24
+
25
+ def debit?
26
+ amount < 0
27
+ end
28
+
29
+ private
30
+
31
+ def calculate_checksum
32
+ parts = [
33
+ account_id.to_s,
34
+ transacted_at.strftime('%F'),
35
+ posted_at.strftime('%F'),
36
+ description,
37
+ amount.to_s,
38
+ balance.to_s
39
+ ]
40
+ Digest::SHA1.hexdigest parts.join "\0"
41
+ end
42
+
43
+ end
@@ -0,0 +1,34 @@
1
+ require 'shellwords'
2
+
3
+ module Statements
4
+ module PdfReader
5
+ class << self
6
+
7
+ def read(path)
8
+ x = 0
9
+ result = []
10
+ loop do
11
+ page = read_page(path, x += 1)
12
+ break if page.chomp.empty?
13
+ result << page
14
+ end
15
+ result
16
+ end
17
+
18
+ private
19
+
20
+ def read_page(path, page)
21
+ `#{pdftotext_path} -enc UTF-8 -table -q -f #{page} -l #{page} #{Shellwords.escape path} /dev/stdout`
22
+ end
23
+
24
+ def pdftotext_path
25
+ unless @pdftotext_path
26
+ @pdftotext_path = `which pdftotext`.chomp
27
+ raise 'Could not find `pdftotext`. Please install Xpdf from http://www.foolabs.com/xpdf/download.html' if @pdftotext_path.empty?
28
+ end
29
+ @pdftotext_path
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,31 @@
1
+ module StGeorge
2
+ ABNS = %w[33007457141 92055513070]
3
+
4
+ def st_george?
5
+ document =~ /\bSt\. *George\b/ && document =~ /\bABN *((?:\d *){11})/ && ABNS.include?($1.delete ' ')
6
+ end
7
+
8
+ def years
9
+ @years ||= period.map(&:year)
10
+ end
11
+
12
+ def parse_date(str)
13
+ date = change_year(Time.parse(str.to_s.strip), years.first)
14
+ date = change_year(date, years.last) if date < period.first
15
+ date
16
+ end
17
+
18
+ def period
19
+ @period ||= (pages.first =~ %r`Statement Period\s+(\d\d/\d\d/\d{4})\s+to\s+(\d\d/\d\d/\d{4})` && [Time.parse($1), Time.parse($2)])
20
+ end
21
+
22
+ def account_number
23
+ @account_number ||= document[/Account Number ([\d ]+)/, 1].strip.gsub(/\s+/, ' ')
24
+ end
25
+
26
+ private
27
+
28
+ def change_year(time, year)
29
+ Time.new year, time.month, time.day
30
+ end
31
+ end
@@ -0,0 +1,53 @@
1
+ require_relative 'common/st_george'
2
+ require 'time'
3
+ require 'bigdecimal'
4
+
5
+ module Statements
6
+ class Reader
7
+ class StGeorgeCreditCard < self
8
+ include StGeorge
9
+
10
+ def valid?
11
+ st_george? && pages.first.include?('Credit Limit')
12
+ end
13
+
14
+ def self.cell_pattern
15
+ @cell_pattern ||= %r`^
16
+ \s* (\d{1,2}\s+[A-Z][a-z]{2})
17
+ \s* (\d{1,2}\s+[A-Z][a-z]{2})
18
+ \s* (.+?)
19
+ \s* (\$[\d,]+\.\d\d(?:\s+CR)?)
20
+ \s* (\$[\d,]+\.\d\d(?:\s+CR)?)
21
+ \s* (\n\s+\d+\.\d\d\s+[A-Z]{3})?
22
+ \s*$
23
+ `x
24
+ end
25
+
26
+ def parse_cells(cells, tx)
27
+ [:posted_at, :transacted_at].each.with_index do |attr, index|
28
+ date = Time.parse(cells[index])
29
+ tx[attr] = Time.new((date.month == 12 ? years.first : years.last), date.month, date.day)
30
+ end
31
+ tx.description = cells[2]
32
+ {amount: 3, balance: 4}.each do |attr, index|
33
+ number = BigDecimal cells[index][/[\d,]+\.\d+/].delete(',')
34
+ credit = cells[index].end_with? 'CR'
35
+ number *= -1 unless credit
36
+ tx[attr] = number
37
+ end
38
+ foreign = cells[5]
39
+ if foreign
40
+ tx.foreign_amount = BigDecimal foreign[0..-5]
41
+ tx.foreign_amount *= -1 if tx.amount < 0
42
+ tx.foreign_currency = foreign[-3..-1]
43
+ end
44
+ tx.set_account account_name, account_number
45
+ end
46
+
47
+ def account_name
48
+ @account_name ||= document[/^\s*(.+)\s*Statement\s*$/, 1].strip
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,95 @@
1
+ require_relative 'common/st_george'
2
+ require 'time'
3
+ require 'bigdecimal'
4
+
5
+ module Statements
6
+ class Reader
7
+ class StGeorgeSavings < self
8
+ include StGeorge
9
+
10
+ DATED_DESCRIPTIONS = [
11
+ 'INTERNET WITHDRAWAL',
12
+ 'INTERNET DEPOSIT',
13
+ 'ATM DEPOSIT',
14
+ 'ATM WITHDRAWAL',
15
+ 'VISA PURCHASE',
16
+ 'VISA PURCHASE O/SEAS',
17
+ 'EFTPOS PURCHASE'
18
+ ]
19
+
20
+ def valid?
21
+ st_george? && pages.first =~ /BSB\s+Number\s+112-879/
22
+ end
23
+
24
+ def self.cell_pattern
25
+ @cell_pattern ||= %r`^
26
+ (\d\d\s+)
27
+ ([A-Z]{3}\s+)
28
+ (.+?)
29
+ ([\d,]+\.\d\d) \s+
30
+ ([\d,]+\.\d\d) \s*
31
+ $
32
+ ((?:
33
+ \n+\040[^\r\n]+
34
+ )*)
35
+ `x
36
+ end
37
+
38
+ def parse_cells(cells, tx, page_index)
39
+
40
+ # Examples cells:
41
+ # ['17 ', 'NOV ', 'LINE 1 ', '12,345.67', '50.00', "\n LINE 2\n Line 3"]
42
+
43
+ tx.posted_at = parse_date(cells[0..1].join)
44
+
45
+ is_debit = is_debit?(cells, page_index)
46
+ debit_factor = is_debit ? -1 : 1
47
+
48
+ tx.amount = BigDecimal(cells[3].delete ',') * debit_factor
49
+ tx.balance = BigDecimal(cells[4].delete ',')
50
+
51
+ # TODO: negative balances
52
+
53
+ lines = [cells[2].strip]
54
+ lines += cells[5].strip.split(/\s*\n\s*/) if cells[5]
55
+ lines.each { |line| line.gsub! /\s+/, ' ' }
56
+ lines.reject! { |line| line.delete(' ').start_with? 'SUBTOTALCARRIEDFORWARDTONEXTPAGE' }
57
+
58
+ tx.description = lines.join("\n")
59
+
60
+ if lines.first =~ %r`^(.+?) (?:(\d\d)/(\d\d)/\d\d|(\d\d\w{3}) \d\d:\d\d)$` && DATED_DESCRIPTIONS.include?($1)
61
+ description = $1
62
+ tx.transacted_at = $2 ? parse_date("2000-#{$3}-#{$2}") : parse_date($4)
63
+ if description.end_with?('O/SEAS') && lines.last =~ /\b([A-Z]{3}) ([\d,]+\.\d\d)$/
64
+ tx.foreign_currency = $1
65
+ tx.foreign_amount = BigDecimal($2.delete ',') * debit_factor
66
+ end
67
+ end
68
+
69
+ tx.transacted_at ||= tx.posted_at
70
+
71
+ tx.set_account account_name, account_number
72
+ end
73
+
74
+ def is_debit?(cells, page_index)
75
+ amount_offset = cells[0..2].map(&:length).inject(:+)
76
+ amount_center = amount_offset + cells[3].length / 2
77
+ amount_center < credit_threshold(page_index)
78
+ end
79
+
80
+ def credit_threshold(page)
81
+ (@credit_thresholds ||= {})[page] ||= find_credit_threshold(page)
82
+ end
83
+
84
+ def find_credit_threshold(page)
85
+ lengths = pages[page].scan(/^(Date\s+Transaction\s+De\w+\s+)(Debit\s+Credit)/).first.map(&:length)
86
+ lengths.first + lengths.last / 2
87
+ end
88
+
89
+ def account_name
90
+ @account_name ||= document[/^\s*Statement\s+of\s+Account\s*((?:\S\s{0,4})+)/, 1].strip
91
+ end
92
+
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,76 @@
1
+ require 'forwardable'
2
+
3
+ module Statements
4
+ class Reader
5
+ extend Forwardable
6
+
7
+ class << self
8
+ attr_reader :classes
9
+ end
10
+
11
+ unless classes
12
+ Dir[Statements::ROOT.join('lib/statements/reader/*.rb')].each { |p| require p }
13
+ @classes = constants.map { |n| const_get n }.select { |c| Class === c && c < Reader }
14
+ end
15
+
16
+ def self.for_file(file)
17
+ file = file.to_s
18
+ pages = (file =~ /\.pdf$/i) ? PdfReader.read(file) : File.read(file).split(/-{5,}/)
19
+ classes.each do |klass|
20
+ reader = klass.new(pages)
21
+ return reader if reader.valid?
22
+ end
23
+ nil
24
+ end
25
+
26
+ def self.read_dir(dir)
27
+ base = Pathname(dir).realpath
28
+ Dir[base.join('**/*.{pdf,txt}')].each do |path|
29
+ rel_path = Pathname(path).relative_path_from(base)
30
+ begin
31
+ doc = Document.find_or_initialize_by(path: rel_path.to_s)
32
+ doc.scan base: base
33
+ rescue => e
34
+ puts "error: #{e.class.name} #{e.message}\n #{e.backtrace.join "\n "}"
35
+ end
36
+ end
37
+ end
38
+
39
+ attr_reader :pages, :document
40
+
41
+ delegate [:include?, :scan] => :document
42
+
43
+ def initialize(pages)
44
+ @pages = pages
45
+ @document = pages.join("\n").freeze
46
+ end
47
+
48
+ def cell_pattern
49
+ self.class.cell_pattern
50
+ end
51
+
52
+ def self.cell_pattern
53
+ raise NotImplementedError
54
+ end
55
+
56
+ def transactions
57
+ @transactions ||= search_for_transactions
58
+ end
59
+
60
+ def search_for_transactions
61
+ index = 0
62
+ result = []
63
+ pages.each.with_index do |page, page_index|
64
+ page.scan(cell_pattern).each do |cells|
65
+ result << Transaction.new(document_line: index += 1).tap do |transaction|
66
+ args = [cells, transaction, page_index]
67
+ arity = method(:parse_cells).arity
68
+ parse_cells *args[0..(arity - 1)]
69
+ end
70
+ end
71
+ end
72
+ result
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,48 @@
1
+ require 'time'
2
+
3
+ module Statements
4
+ class Search
5
+
6
+ attr_reader :input
7
+
8
+ def initialize(input)
9
+ @input = input
10
+ end
11
+
12
+ def transactions
13
+ @transactions ||= query.all
14
+ end
15
+
16
+ def debits
17
+ transactions.select { |t| t.amount < 0 }
18
+ end
19
+
20
+ def credits
21
+ transactions.select { |t| t.amount > 0 }
22
+ end
23
+
24
+ def total(collection = :transactions)
25
+ __send__(collection).inject(0) { |sum, t| sum + t.amount }
26
+ end
27
+
28
+ private
29
+
30
+ def query
31
+ query = Transaction.order(input['order'])
32
+ query = query.where(account_id: input['accounts'])
33
+ query = query.where('posted_at > ? and posted_at < ?',
34
+ Time.parse(input['date_start']),
35
+ Time.parse(input['date_end']))
36
+ query = query.where('amount < 0') if input['type'] == 'debits'
37
+ query = query.where('amount > 0') if input['type'] == 'credits'
38
+ query = query.where(colour: input['colours'])
39
+ text = input['search'].strip.downcase
40
+ unless text.empty?
41
+ words = text.split(/\s+/)
42
+ query = query.where('lower(description) like ?', "%#{words.join '%'}%")
43
+ end
44
+ query
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,69 @@
1
+ require 'json'
2
+ require 'erb'
3
+
4
+ module Statements
5
+ class Server
6
+
7
+ def initialize
8
+
9
+ end
10
+
11
+ def call(env)
12
+ request = Rack::Request.new(env)
13
+ verb = request.request_method.downcase
14
+ path = request.path_info[1..-1].split('/')
15
+ handler_name = "#{verb}_#{path.first || 'index'}".gsub('.', '_')
16
+ args = [request] + path[1..-1]
17
+ method = respond_to?(handler_name) && method(handler_name)
18
+ if method && method.arity == args.length
19
+ __send__ handler_name, *args
20
+ else
21
+ [404, {}, ['Not found']]
22
+ end
23
+ end
24
+
25
+ # noinspection RubyStringKeysInHashInspection
26
+ def serve(type, str)
27
+ [200, {'Content-Type' => type, 'Content-Length' => str.length.to_s}, [str]]
28
+ end
29
+
30
+ def json(data)
31
+ serve 'application/json', JSON.generate(data, quirks_mode: true)
32
+ end
33
+
34
+ def js(script)
35
+ serve 'application/x-javascript', script
36
+ end
37
+
38
+ def html(html)
39
+ serve 'text/html; charset=UTF-8', html
40
+ end
41
+
42
+ def get_accounts_js(request)
43
+ js "window.accounts = #{Account.to_json}"
44
+ end
45
+
46
+ def self.render(template, obj = nil)
47
+ @templates ||= {}
48
+ @templates[template] ||= ERB.new(File.read File.expand_path("../views/#{template}.erb", __FILE__))
49
+ @templates[template].result (obj || self).instance_eval { binding }
50
+ end
51
+
52
+ def post_search_html(request)
53
+ html self.class.render 'search', Search.new(JSON.parse request.body.read)
54
+ end
55
+
56
+ def post_colour_json(request)
57
+ input = JSON.parse(request.body.read)
58
+ transaction = Transaction.find(input['id']) rescue false
59
+ if transaction
60
+ transaction.colour = input['colour']
61
+ transaction.save
62
+ json success: true
63
+ else
64
+ 400
65
+ end
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,3 @@
1
+ module Statements
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,6 @@
1
+ <tr>
2
+ <th></th>
3
+ <th><%= "#{ActiveSupport::NumberHelper.number_to_delimited count} #{name.capitalize[0..-2]}#{count == 1 ? '' : 's'}" %></th>
4
+ <td><%= ActiveSupport::NumberHelper.number_to_currency total.abs, unit: '' %></td>
5
+ <th></th>
6
+ </tr>
@@ -0,0 +1,16 @@
1
+ <tr class="<%= credit? ? 'credit' : 'debit' %> colour-<%= colour %>" data-id="<%= id %>">
2
+ <td class="date">
3
+ <%= posted_at.strftime '%-d %b %Y' %>
4
+ </td>
5
+ <td class="description">
6
+ <% desc = description.lines %>
7
+ <p><%= desc[0] %></p>
8
+ <% if desc.length > 1 %>
9
+ <p class="small"><%= desc[1..-1].join '<br/>' %></p>
10
+ <% end %>
11
+ </td>
12
+ <td class="amount"><%= ActiveSupport::NumberHelper.number_to_currency amount.abs, unit: '' %></td>
13
+ <td class="colour">
14
+ <a href="javascript:" class="picker"></a>
15
+ </td>
16
+ </tr>
@@ -0,0 +1,25 @@
1
+ <table class="table table-condensed">
2
+ <thead>
3
+ <tr>
4
+ <th>Posted</th>
5
+ <th>Description</th>
6
+ <th class="amount">Amount</th>
7
+ <th><!-- Tags --></th>
8
+ </tr>
9
+ </thead>
10
+ <tbody>
11
+ <%= transactions.map { |x| Statements::Server.render 'row', x }.join %>
12
+ </tbody>
13
+ <tfoot>
14
+ <%= %w[debits credits transactions].map do |n|
15
+ list = __send__(n)
16
+ data = {
17
+ name: n,
18
+ count: list.count,
19
+ total: total(n)
20
+ }
21
+ Statements::Server.render 'footer', OpenStruct.new(data)
22
+ end.join %>
23
+ </tfoot>
24
+ </table>
25
+ <!-- fix for long response bug -->
data/lib/statements.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'pathname'
2
+
3
+ module Statements
4
+ ROOT = Pathname File.expand_path('../..', __FILE__)
5
+ end
6
+
7
+ require 'statements/version'
8
+ require 'statements/cli'
9
+ require 'statements/reader'
10
+ require 'statements/database'
11
+ require 'statements/pdf_reader'
12
+ require 'statements/server'
13
+ require 'statements/search'
14
+
15
+ require 'statements/models/transaction'
16
+ require 'statements/models/account'
17
+ require 'statements/models/document'