bank_statement_parser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b356589f88c707aba064400a8b0a1b3a8f34c3f1
4
- data.tar.gz: 4e26f65ffcb297fbdcb2d9d92ae2346b7fd203d0
3
+ metadata.gz: 63f514f97e4ea127e8f5f207466ae59371eefc23
4
+ data.tar.gz: d09dbf8a966d8def416d9597db28e0949a02c106
5
5
  SHA512:
6
- metadata.gz: 65450fbca03a16b5a0a569366c6cb644c53bff164acbc0cd816a4e6780fe2e3904971798d533b47dd9046bdb06263375b1baf0518d6c4e122a2dd5a6cdbbf099
7
- data.tar.gz: 704f2be78c57c41c0b62d8af54114ca42c2af151323e4c4539a72838fa0c0f6e27111482b2e5cd2836281966a19fd3308fdbcc446efc8c8a560e4b21c7bef290
6
+ metadata.gz: 00ce8ead80e9c2da7ea606f46e1d3ea983e4a7e0d950b36e3774db3f394ebcfae83bff3a60d60619b270954c2e3c447b225ec5579fb726661de312a846ad7fe5
7
+ data.tar.gz: bafa6d5ada99cc93a2e4d646158c244f15d0dfee6e214ce25a57f3de602531100257a8873261d19ae6c6df505d07dd93341558512fb3c5e42b64a3fcc5cea441
@@ -23,8 +23,8 @@ module BankStatementParser
23
23
 
24
24
  reset
25
25
 
26
- # Grab the full text file content
27
- full_text = File.read(path)
26
+ # Grab the full text file content, and re-encode to ASCII
27
+ full_text = ascii_filter(File.read(path))
28
28
 
29
29
  # Process each line in turn
30
30
  full_text.split("\n").each do |line|
@@ -52,5 +52,38 @@ module BankStatementParser
52
52
  @records = []
53
53
  end
54
54
 
55
+ private
56
+
57
+ # Filter the specified text, re-encoding to ASCII
58
+ def self.ascii_filter text
59
+ rv = text
60
+
61
+ # Squash some Unicode character categories
62
+ #
63
+ # {Zs} necessary to match statement date line
64
+ # {Pc} necessary to match statement record lines
65
+ rv.gsub!(/[\p{Zs}\p{Pc}]/, " ")
66
+
67
+ # Replace Unicode soft hyphens
68
+ rv.gsub!(/\u00ad/, "-")
69
+
70
+ # Replace... well, who knows just *what* this is...
71
+ rv.gsub!(/\u0a0c/, " ")
72
+
73
+ # Re-encode to ASCII
74
+ encoding_options = {
75
+ invalid: :replace, # Replace invalid byte sequences
76
+ undef: :replace, # Replace anything not defined in ASCII
77
+ replace: '', # Use a blank for those replacements
78
+ universal_newline: true # Always break lines with \n
79
+ }
80
+ rv = rv.encode(Encoding.find('US-ASCII'), encoding_options)
81
+
82
+ # Replace ASCII form feed characters
83
+ rv.gsub!(/\f/, "\n")
84
+
85
+ rv
86
+ end
87
+
55
88
  end
56
89
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bank_statement_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Dawson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-24 00:00:00.000000000 Z
11
+ date: 2015-03-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A gem for parsing bank statements
14
14
  email: spdawson@gmail.com