ing_kontoauszug_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +19 -0
- data/LICENSE +21 -0
- data/README.md +159 -0
- data/bin/console +8 -0
- data/bin/pdf_to_json +64 -0
- data/bin/setup +5 -0
- data/lib/ing_kontoauszug_parser/header.rb +146 -0
- data/lib/ing_kontoauszug_parser/pdf_extractor.rb +233 -0
- data/lib/ing_kontoauszug_parser/statement_parser.rb +269 -0
- data/lib/ing_kontoauszug_parser/text_parser.rb +905 -0
- data/lib/ing_kontoauszug_parser/version.rb +14 -0
- data/lib/ing_kontoauszug_parser.rb +105 -0
- metadata +74 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module IngKontoauszugParser
|
|
4
|
+
# The current version of the ing_kontoauszug_parser gem.
|
|
5
|
+
#
|
|
6
|
+
# Follows semantic versioning (major.minor.patch):
|
|
7
|
+
# - Major: Breaking API changes
|
|
8
|
+
# - Minor: New features, backward compatible
|
|
9
|
+
# - Patch: Bug fixes, backward compatible
|
|
10
|
+
#
|
|
11
|
+
# @return [String] the version string
|
|
12
|
+
# @api public
|
|
13
|
+
VERSION = '0.1.0'
|
|
14
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'ing_kontoauszug_parser/version'
|
|
4
|
+
require_relative 'ing_kontoauszug_parser/statement_parser'
|
|
5
|
+
|
|
6
|
+
# A Ruby library for parsing ING Bank (Germany) account statement PDFs.
|
|
7
|
+
#
|
|
8
|
+
# This gem extracts structured transaction data from ING statement PDFs,
|
|
9
|
+
# including IBAN validation, SEPA reference extraction, and transaction
|
|
10
|
+
# categorization.
|
|
11
|
+
#
|
|
12
|
+
# == Quick Start
|
|
13
|
+
#
|
|
14
|
+
# require 'ing_kontoauszug_parser'
|
|
15
|
+
#
|
|
16
|
+
# parser = IngKontoauszugParser::StatementParser.new
|
|
17
|
+
# result = parser.parse(file_path: 'statement.pdf')
|
|
18
|
+
#
|
|
19
|
+
# puts "Account: #{result[:header][:iban]}"
|
|
20
|
+
# result[:statements].each do |transaction|
|
|
21
|
+
# puts "#{transaction[:booking_date]}: #{transaction[:recipient]} #{transaction[:amount_eur]}"
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# == Key Features
|
|
25
|
+
#
|
|
26
|
+
# - PDF text extraction with automatic backend selection (poppler or pdf-reader)
|
|
27
|
+
# - IBAN extraction and ISO 13616 checksum validation
|
|
28
|
+
# - SEPA mandate and reference extraction
|
|
29
|
+
# - Google Pay transaction detection
|
|
30
|
+
# - Configurable language labels for international statements
|
|
31
|
+
#
|
|
32
|
+
# == Exception Handling
|
|
33
|
+
#
|
|
34
|
+
# All library exceptions inherit from {IngKontoauszugParser::Error}, allowing
|
|
35
|
+
# you to rescue all parsing failures in one place:
|
|
36
|
+
#
|
|
37
|
+
# begin
|
|
38
|
+
# result = parser.parse(file_path: path)
|
|
39
|
+
# rescue IngKontoauszugParser::Error => e
|
|
40
|
+
# logger.error "Failed to parse statement: #{e.message}"
|
|
41
|
+
# end
|
|
42
|
+
#
|
|
43
|
+
# For more specific handling, rescue individual exception classes:
|
|
44
|
+
# - {HeaderNotFound} - IBAN not found in statement
|
|
45
|
+
# - {InvalidIBAN} - IBAN checksum validation failed
|
|
46
|
+
# - {BookingParseError} - Transaction parsing failed
|
|
47
|
+
#
|
|
48
|
+
# @see StatementParser The main parsing interface
|
|
49
|
+
# @see https://github.com/bigcurl/ing-kontoauszug-parser GitHub repository
|
|
50
|
+
module IngKontoauszugParser
|
|
51
|
+
# Base exception class for all parsing-related errors.
|
|
52
|
+
#
|
|
53
|
+
# Rescue this class to catch any error raised by the library.
|
|
54
|
+
# All specific exceptions inherit from this class.
|
|
55
|
+
#
|
|
56
|
+
# @example Catch all library errors
|
|
57
|
+
# begin
|
|
58
|
+
# result = parser.parse(file_path: 'statement.pdf')
|
|
59
|
+
# rescue IngKontoauszugParser::Error => e
|
|
60
|
+
# puts "Parsing failed: #{e.message}"
|
|
61
|
+
# end
|
|
62
|
+
class Error < StandardError; end
|
|
63
|
+
|
|
64
|
+
# Raised when the IBAN cannot be located in the statement text.
|
|
65
|
+
#
|
|
66
|
+
# This typically indicates that the PDF is not a valid ING statement,
|
|
67
|
+
# the text extraction failed to capture the header, or the statement
|
|
68
|
+
# format has changed.
|
|
69
|
+
#
|
|
70
|
+
# @example Handle missing IBAN
|
|
71
|
+
# begin
|
|
72
|
+
# result = parser.parse(file_path: path)
|
|
73
|
+
# rescue IngKontoauszugParser::HeaderNotFound
|
|
74
|
+
# puts "Could not find account IBAN - is this an ING statement?"
|
|
75
|
+
# end
|
|
76
|
+
class HeaderNotFound < Error; end
|
|
77
|
+
|
|
78
|
+
# Raised when the extracted IBAN fails ISO 13616 checksum validation.
|
|
79
|
+
#
|
|
80
|
+
# This may indicate a corrupted PDF, OCR errors during text extraction,
|
|
81
|
+
# or an intentionally invalid IBAN. You can disable validation by passing
|
|
82
|
+
# +validate_iban: false+ to the parser constructor.
|
|
83
|
+
#
|
|
84
|
+
# @example Handle invalid IBAN
|
|
85
|
+
# begin
|
|
86
|
+
# result = parser.parse(file_path: path)
|
|
87
|
+
# rescue IngKontoauszugParser::InvalidIBAN => e
|
|
88
|
+
# puts "IBAN validation failed: #{e.message}"
|
|
89
|
+
# end
|
|
90
|
+
class InvalidIBAN < Error; end
|
|
91
|
+
|
|
92
|
+
# Raised when transaction parsing fails or no transactions are found.
|
|
93
|
+
#
|
|
94
|
+
# This may occur if the statement format is unexpected, the PDF contains
|
|
95
|
+
# no transactions (empty statement), or the text extraction produced
|
|
96
|
+
# malformed output.
|
|
97
|
+
#
|
|
98
|
+
# @example Handle parsing failure
|
|
99
|
+
# begin
|
|
100
|
+
# result = parser.parse(file_path: path)
|
|
101
|
+
# rescue IngKontoauszugParser::BookingParseError => e
|
|
102
|
+
# puts "Could not parse transactions: #{e.message}"
|
|
103
|
+
# end
|
|
104
|
+
class BookingParseError < Error; end
|
|
105
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: ing_kontoauszug_parser
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- bigcurl
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: pdf-reader
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '2.12'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '2.12'
|
|
26
|
+
description: A toolkit for ingesting and parsing statements from ING Bank.
|
|
27
|
+
email:
|
|
28
|
+
- bigcurl@users.noreply.github.com
|
|
29
|
+
executables:
|
|
30
|
+
- console
|
|
31
|
+
- pdf_to_json
|
|
32
|
+
- setup
|
|
33
|
+
extensions: []
|
|
34
|
+
extra_rdoc_files: []
|
|
35
|
+
files:
|
|
36
|
+
- CHANGELOG.md
|
|
37
|
+
- LICENSE
|
|
38
|
+
- README.md
|
|
39
|
+
- bin/console
|
|
40
|
+
- bin/pdf_to_json
|
|
41
|
+
- bin/setup
|
|
42
|
+
- lib/ing_kontoauszug_parser.rb
|
|
43
|
+
- lib/ing_kontoauszug_parser/header.rb
|
|
44
|
+
- lib/ing_kontoauszug_parser/pdf_extractor.rb
|
|
45
|
+
- lib/ing_kontoauszug_parser/statement_parser.rb
|
|
46
|
+
- lib/ing_kontoauszug_parser/text_parser.rb
|
|
47
|
+
- lib/ing_kontoauszug_parser/version.rb
|
|
48
|
+
homepage: https://github.com/bigcurl/ing-kontoauszug-parser
|
|
49
|
+
licenses:
|
|
50
|
+
- MIT
|
|
51
|
+
metadata:
|
|
52
|
+
allowed_push_host: https://rubygems.org
|
|
53
|
+
homepage_uri: https://github.com/bigcurl/ing-kontoauszug-parser
|
|
54
|
+
source_code_uri: https://github.com/bigcurl/ing-kontoauszug-parser
|
|
55
|
+
changelog_uri: https://github.com/bigcurl/ing-kontoauszug-parser/blob/main/CHANGELOG.md
|
|
56
|
+
rubygems_mfa_required: 'true'
|
|
57
|
+
rdoc_options: []
|
|
58
|
+
require_paths:
|
|
59
|
+
- lib
|
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
|
+
requirements:
|
|
62
|
+
- - ">="
|
|
63
|
+
- !ruby/object:Gem::Version
|
|
64
|
+
version: 3.0.0
|
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '0'
|
|
70
|
+
requirements: []
|
|
71
|
+
rubygems_version: 4.0.4
|
|
72
|
+
specification_version: 4
|
|
73
|
+
summary: Parse statements from ING Bank
|
|
74
|
+
test_files: []
|