bank_statement_parser 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bank_statement_parser/base.rb +8 -37
- data/lib/bank_statement_parser/hsbc.rb +4 -0
- data/lib/bank_statement_parser/utils.rb +55 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fda2797823d21a6f4f8b438c59cd7eb1cfd994a0
|
4
|
+
data.tar.gz: d98654067e0e2cf05300324b8b73066ea1aad925
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 670ac0361e50588072428059ec7c2bc20d6a53e310ed9eed4a91f4fed392acec6e5fc9627f07009b24f6d0a7b126b5e2ef15e40ebb98131b04bdd86061b3b87a
|
7
|
+
data.tar.gz: 8308156691543687651f998b3863bba3db5e4d875b1da7090ded53762d47efe0819250efa311a9af091f9e5516b5c6c345663cef47cce9db7da6d761a4d39378
|
@@ -19,10 +19,14 @@ module BankStatementParser
|
|
19
19
|
|
20
20
|
# Base class for statement parsers
|
21
21
|
#
|
22
|
-
# Subclasses must implement the following instance methods
|
22
|
+
# Subclasses **must** implement the following instance methods
|
23
23
|
#
|
24
|
-
# * void reset()
|
25
24
|
# * bool handle_line(String line)
|
25
|
+
#
|
26
|
+
# Subclasses *may* override the following instance methods, but **must**
|
27
|
+
# remember to call the base class method from the override
|
28
|
+
#
|
29
|
+
# * void reset()
|
26
30
|
class Base
|
27
31
|
|
28
32
|
require 'fileutils.rb'
|
@@ -40,8 +44,8 @@ module BankStatementParser
|
|
40
44
|
|
41
45
|
reset
|
42
46
|
|
43
|
-
# Grab the full text file content
|
44
|
-
full_text =
|
47
|
+
# Grab the full text file content (utf-8)
|
48
|
+
full_text = File.read(path)
|
45
49
|
|
46
50
|
# Process each line in turn
|
47
51
|
full_text.split("\n").each do |line|
|
@@ -69,38 +73,5 @@ module BankStatementParser
|
|
69
73
|
@records = []
|
70
74
|
end
|
71
75
|
|
72
|
-
private
|
73
|
-
|
74
|
-
# Filter the specified text, re-encoding to ASCII
|
75
|
-
def self.ascii_filter text
|
76
|
-
rv = text
|
77
|
-
|
78
|
-
# Squash some Unicode character categories
|
79
|
-
#
|
80
|
-
# {Zs} necessary to match statement date line
|
81
|
-
# {Pc} necessary to match statement record lines
|
82
|
-
rv.gsub!(/[\p{Zs}\p{Pc}]/, " ")
|
83
|
-
|
84
|
-
# Replace Unicode soft hyphens
|
85
|
-
rv.gsub!(/\u00ad/, "-")
|
86
|
-
|
87
|
-
# Replace... well, who knows just *what* this is...
|
88
|
-
rv.gsub!(/\u0a0c/, " ")
|
89
|
-
|
90
|
-
# Re-encode to ASCII
|
91
|
-
encoding_options = {
|
92
|
-
invalid: :replace, # Replace invalid byte sequences
|
93
|
-
undef: :replace, # Replace anything not defined in ASCII
|
94
|
-
replace: '', # Use a blank for those replacements
|
95
|
-
universal_newline: true # Always break lines with \n
|
96
|
-
}
|
97
|
-
rv = rv.encode(Encoding.find('US-ASCII'), encoding_options)
|
98
|
-
|
99
|
-
# Replace ASCII form feed characters
|
100
|
-
rv.gsub!(/\f/, "\n")
|
101
|
-
|
102
|
-
rv
|
103
|
-
end
|
104
|
-
|
105
76
|
end
|
106
77
|
end
|
@@ -17,6 +17,7 @@
|
|
17
17
|
|
18
18
|
require 'date'
|
19
19
|
require 'bank_statement_parser/base'
|
20
|
+
require 'bank_statement_parser/utils'
|
20
21
|
module BankStatementParser
|
21
22
|
|
22
23
|
# Parser for HSBC bank statements
|
@@ -27,6 +28,9 @@ module BankStatementParser
|
|
27
28
|
# Returns true if parsing should continue; false to terminate the parser
|
28
29
|
def handle_line line
|
29
30
|
|
31
|
+
# Re-encode line to ASCII
|
32
|
+
line = Utils.ascii_filter(line)
|
33
|
+
|
30
34
|
# Skip blank lines
|
31
35
|
return true if line =~ /\A\s*\z/
|
32
36
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright 2015 Simon Dawson <spdawson@gmail.com>
|
2
|
+
|
3
|
+
# This file is part of bank_statement_parser.
|
4
|
+
#
|
5
|
+
# bank_statement_parser is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# bank_statement_parser is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
module BankStatementParser
|
19
|
+
|
20
|
+
# Utilities
|
21
|
+
class Utils
|
22
|
+
|
23
|
+
# Filter the specified text, re-encoding to ASCII
|
24
|
+
def self.ascii_filter text
|
25
|
+
rv = text
|
26
|
+
|
27
|
+
# Squash some Unicode character categories
|
28
|
+
#
|
29
|
+
# {Zs} necessary to match statement date line
|
30
|
+
# {Pc} necessary to match statement record lines
|
31
|
+
rv.gsub!(/[\p{Zs}\p{Pc}]/, " ")
|
32
|
+
|
33
|
+
# Replace Unicode soft hyphens
|
34
|
+
rv.gsub!(/\u00ad/, "-")
|
35
|
+
|
36
|
+
# Replace... well, who knows just *what* this is...
|
37
|
+
rv.gsub!(/\u0a0c/, " ")
|
38
|
+
|
39
|
+
# Re-encode to ASCII
|
40
|
+
encoding_options = {
|
41
|
+
invalid: :replace, # Replace invalid byte sequences
|
42
|
+
undef: :replace, # Replace anything not defined in ASCII
|
43
|
+
replace: '', # Use a blank for those replacements
|
44
|
+
universal_newline: true # Always break lines with \n
|
45
|
+
}
|
46
|
+
rv = rv.encode(Encoding.find('US-ASCII'), encoding_options)
|
47
|
+
|
48
|
+
# Replace ASCII form feed characters
|
49
|
+
rv.gsub!(/\f/, "\n")
|
50
|
+
|
51
|
+
rv
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bank_statement_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Dawson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A gem for parsing bank statements
|
14
14
|
email: spdawson@gmail.com
|
@@ -20,6 +20,7 @@ files:
|
|
20
20
|
- lib/bank_statement_parser/base.rb
|
21
21
|
- lib/bank_statement_parser/hsbc.rb
|
22
22
|
- lib/bank_statement_parser/statement_record.rb
|
23
|
+
- lib/bank_statement_parser/utils.rb
|
23
24
|
homepage: http://rubygems.org/gems/bank_statement_parser
|
24
25
|
licenses:
|
25
26
|
- GPLv3
|