bank_statement_parser 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2a965f6dc78b631995b0222f5a04602511720ca5
4
- data.tar.gz: 662d867bdf2754e22a552824131abbdb5c800ba4
3
+ metadata.gz: fda2797823d21a6f4f8b438c59cd7eb1cfd994a0
4
+ data.tar.gz: d98654067e0e2cf05300324b8b73066ea1aad925
5
5
  SHA512:
6
- metadata.gz: 58245edba68a4434c878e5ec6afbf72e4002cc7d0f79a63a6da6c34f7f04dfb7615054fd7e9f8b4004580f274727cc1899259235236ca72c3f5959967f758ca4
7
- data.tar.gz: d97413e5f9611567f75b01a74ebccb38fcaf1d0a3f6f70e6248d12801a37e5120dd8a85ad59b8749bfb7d03b4c95e25cf5061d138fa476637720e8f2588949f6
6
+ metadata.gz: 670ac0361e50588072428059ec7c2bc20d6a53e310ed9eed4a91f4fed392acec6e5fc9627f07009b24f6d0a7b126b5e2ef15e40ebb98131b04bdd86061b3b87a
7
+ data.tar.gz: 8308156691543687651f998b3863bba3db5e4d875b1da7090ded53762d47efe0819250efa311a9af091f9e5516b5c6c345663cef47cce9db7da6d761a4d39378
@@ -19,10 +19,14 @@ module BankStatementParser
19
19
 
20
20
  # Base class for statement parsers
21
21
  #
22
- # Subclasses must implement the following instance methods
22
+ # Subclasses **must** implement the following instance methods
23
23
  #
24
- # * void reset()
25
24
  # * bool handle_line(String line)
25
+ #
26
+ # Subclasses *may* override the following instance methods, but **must**
27
+ # remember to call the base class method from the override
28
+ #
29
+ # * void reset()
26
30
  class Base
27
31
 
28
32
  require 'fileutils.rb'
@@ -40,8 +44,8 @@ module BankStatementParser
40
44
 
41
45
  reset
42
46
 
43
- # Grab the full text file content, and re-encode to ASCII
44
- full_text = ascii_filter(File.read(path))
47
+ # Grab the full text file content (utf-8)
48
+ full_text = File.read(path)
45
49
 
46
50
  # Process each line in turn
47
51
  full_text.split("\n").each do |line|
@@ -69,38 +73,5 @@ module BankStatementParser
69
73
  @records = []
70
74
  end
71
75
 
72
- private
73
-
74
- # Filter the specified text, re-encoding to ASCII
75
- def self.ascii_filter text
76
- rv = text
77
-
78
- # Squash some Unicode character categories
79
- #
80
- # {Zs} necessary to match statement date line
81
- # {Pc} necessary to match statement record lines
82
- rv.gsub!(/[\p{Zs}\p{Pc}]/, " ")
83
-
84
- # Replace Unicode soft hyphens
85
- rv.gsub!(/\u00ad/, "-")
86
-
87
- # Replace... well, who knows just *what* this is...
88
- rv.gsub!(/\u0a0c/, " ")
89
-
90
- # Re-encode to ASCII
91
- encoding_options = {
92
- invalid: :replace, # Replace invalid byte sequences
93
- undef: :replace, # Replace anything not defined in ASCII
94
- replace: '', # Use a blank for those replacements
95
- universal_newline: true # Always break lines with \n
96
- }
97
- rv = rv.encode(Encoding.find('US-ASCII'), encoding_options)
98
-
99
- # Replace ASCII form feed characters
100
- rv.gsub!(/\f/, "\n")
101
-
102
- rv
103
- end
104
-
105
76
  end
106
77
  end
@@ -17,6 +17,7 @@
17
17
 
18
18
  require 'date'
19
19
  require 'bank_statement_parser/base'
20
+ require 'bank_statement_parser/utils'
20
21
  module BankStatementParser
21
22
 
22
23
  # Parser for HSBC bank statements
@@ -27,6 +28,9 @@ module BankStatementParser
27
28
  # Returns true if parsing should continue; false to terminate the parser
28
29
  def handle_line line
29
30
 
31
+ # Re-encode line to ASCII
32
+ line = Utils.ascii_filter(line)
33
+
30
34
  # Skip blank lines
31
35
  return true if line =~ /\A\s*\z/
32
36
 
@@ -0,0 +1,55 @@
1
+ # Copyright 2015 Simon Dawson <spdawson@gmail.com>
2
+
3
+ # This file is part of bank_statement_parser.
4
+ #
5
+ # bank_statement_parser is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # bank_statement_parser is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ module BankStatementParser
19
+
20
+ # Utilities
21
+ class Utils
22
+
23
+ # Filter the specified text, re-encoding to ASCII
24
+ def self.ascii_filter text
25
+ rv = text
26
+
27
+ # Squash some Unicode character categories
28
+ #
29
+ # {Zs} necessary to match statement date line
30
+ # {Pc} necessary to match statement record lines
31
+ rv.gsub!(/[\p{Zs}\p{Pc}]/, " ")
32
+
33
+ # Replace Unicode soft hyphens
34
+ rv.gsub!(/\u00ad/, "-")
35
+
36
+ # Replace... well, who knows just *what* this is...
37
+ rv.gsub!(/\u0a0c/, " ")
38
+
39
+ # Re-encode to ASCII
40
+ encoding_options = {
41
+ invalid: :replace, # Replace invalid byte sequences
42
+ undef: :replace, # Replace anything not defined in ASCII
43
+ replace: '', # Use a blank for those replacements
44
+ universal_newline: true # Always break lines with \n
45
+ }
46
+ rv = rv.encode(Encoding.find('US-ASCII'), encoding_options)
47
+
48
+ # Replace ASCII form feed characters
49
+ rv.gsub!(/\f/, "\n")
50
+
51
+ rv
52
+ end
53
+
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bank_statement_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Dawson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-25 00:00:00.000000000 Z
11
+ date: 2015-03-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A gem for parsing bank statements
14
14
  email: spdawson@gmail.com
@@ -20,6 +20,7 @@ files:
20
20
  - lib/bank_statement_parser/base.rb
21
21
  - lib/bank_statement_parser/hsbc.rb
22
22
  - lib/bank_statement_parser/statement_record.rb
23
+ - lib/bank_statement_parser/utils.rb
23
24
  homepage: http://rubygems.org/gems/bank_statement_parser
24
25
  licenses:
25
26
  - GPLv3