bank_statement_parser 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bank_statement_parser/base.rb +35 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63f514f97e4ea127e8f5f207466ae59371eefc23
|
4
|
+
data.tar.gz: d09dbf8a966d8def416d9597db28e0949a02c106
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00ce8ead80e9c2da7ea606f46e1d3ea983e4a7e0d950b36e3774db3f394ebcfae83bff3a60d60619b270954c2e3c447b225ec5579fb726661de312a846ad7fe5
|
7
|
+
data.tar.gz: bafa6d5ada99cc93a2e4d646158c244f15d0dfee6e214ce25a57f3de602531100257a8873261d19ae6c6df505d07dd93341558512fb3c5e42b64a3fcc5cea441
|
@@ -23,8 +23,8 @@ module BankStatementParser
|
|
23
23
|
|
24
24
|
reset
|
25
25
|
|
26
|
-
# Grab the full text file content
|
27
|
-
full_text = File.read(path)
|
26
|
+
# Grab the full text file content, and re-encode to ASCII
|
27
|
+
full_text = ascii_filter(File.read(path))
|
28
28
|
|
29
29
|
# Process each line in turn
|
30
30
|
full_text.split("\n").each do |line|
|
@@ -52,5 +52,38 @@ module BankStatementParser
|
|
52
52
|
@records = []
|
53
53
|
end
|
54
54
|
|
55
|
+
private
|
56
|
+
|
57
|
+
# Filter the specified text, re-encoding to ASCII
|
58
|
+
def self.ascii_filter text
|
59
|
+
rv = text
|
60
|
+
|
61
|
+
# Squash some Unicode character categories
|
62
|
+
#
|
63
|
+
# {Zs} necessary to match statement date line
|
64
|
+
# {Pc} necessary to match statement record lines
|
65
|
+
rv.gsub!(/[\p{Zs}\p{Pc}]/, " ")
|
66
|
+
|
67
|
+
# Replace Unicode soft hyphens
|
68
|
+
rv.gsub!(/\u00ad/, "-")
|
69
|
+
|
70
|
+
# Replace... well, who knows just *what* this is...
|
71
|
+
rv.gsub!(/\u0a0c/, " ")
|
72
|
+
|
73
|
+
# Re-encode to ASCII
|
74
|
+
encoding_options = {
|
75
|
+
invalid: :replace, # Replace invalid byte sequences
|
76
|
+
undef: :replace, # Replace anything not defined in ASCII
|
77
|
+
replace: '', # Use a blank for those replacements
|
78
|
+
universal_newline: true # Always break lines with \n
|
79
|
+
}
|
80
|
+
rv = rv.encode(Encoding.find('US-ASCII'), encoding_options)
|
81
|
+
|
82
|
+
# Replace ASCII form feed characters
|
83
|
+
rv.gsub!(/\f/, "\n")
|
84
|
+
|
85
|
+
rv
|
86
|
+
end
|
87
|
+
|
55
88
|
end
|
56
89
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bank_statement_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Dawson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A gem for parsing bank statements
|
14
14
|
email: spdawson@gmail.com
|