find-subscriptions 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +165 -9
- data/lib/find_subscriptions/cli.rb +71 -24
- data/lib/find_subscriptions/config_loader.rb +90 -0
- data/lib/find_subscriptions/help_text.rb +102 -0
- data/lib/find_subscriptions/user_schema_builder.rb +82 -0
- data/lib/find_subscriptions.rb +26 -1
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 11a3975380aece18b36723e9187c87a24aedcc775a2dd8a196cd165c6375ee41
|
|
4
|
+
data.tar.gz: 20e28d7a29bfeba088043b181814475202d1b4a678b7347838fe7312c024be32
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cbbb7b9f12188c6201795d7a28b2f02f21ef3125032cc1878b7c63eb97c0459bd37a1d6449d25c2fde3de990150c92343a4fb997c223594e39964bafaf2c80b4
|
|
7
|
+
data.tar.gz: a8a53c131e70abbb2018c7d0b8698b7d4fd07471229e06d2c631b584dd22cb5387a08380ec2e37c3cc6425b631fa82c3e59d2d31a686e0954e35b3057238ce83
|
data/README.md
CHANGED
|
@@ -6,10 +6,40 @@ Scans bank and credit card CSV exports to surface recurring charges — subscrip
|
|
|
6
6
|
|
|
7
7
|
- Ruby 3.x
|
|
8
8
|
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
gem install find-subscriptions
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Or clone the repo and run directly:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
git clone https://github.com/jeffreybaird/find-subscriptions
|
|
19
|
+
cd find-subscriptions
|
|
20
|
+
bundle install
|
|
21
|
+
./bin/find-subscriptions --files EXPORT.csv
|
|
22
|
+
```
|
|
23
|
+
|
|
9
24
|
## Usage
|
|
10
25
|
|
|
11
26
|
```
|
|
12
|
-
|
|
27
|
+
find-subscriptions --files EXPORT.csv [options]
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Run with no arguments (or `--help`) to see the full help menu:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
find-subscriptions
|
|
34
|
+
find-subscriptions --help
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Get detailed help for a specific flag:
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
find-subscriptions --sort --help
|
|
41
|
+
find-subscriptions --format --help
|
|
42
|
+
find-subscriptions --set-config --help
|
|
13
43
|
```
|
|
14
44
|
|
|
15
45
|
### Options
|
|
@@ -25,6 +55,7 @@ Scans bank and credit card CSV exports to surface recurring charges — subscrip
|
|
|
25
55
|
| `--from DATE` | Only include transactions on or after DATE (`YYYY-MM-DD`) |
|
|
26
56
|
| `--to DATE` | Only include transactions on or before DATE (`YYYY-MM-DD`) |
|
|
27
57
|
| `--format FORMAT` | Output format: `text` (default), `json`, `csv` |
|
|
58
|
+
| `--set-config PATH` | Register a config YAML file to use as defaults on future runs |
|
|
28
59
|
|
|
29
60
|
### Output formats
|
|
30
61
|
|
|
@@ -35,8 +66,8 @@ Scans bank and credit card CSV exports to surface recurring charges — subscrip
|
|
|
35
66
|
| `csv` | CSV with header row — open in a spreadsheet or feed into other scripts |
|
|
36
67
|
|
|
37
68
|
```
|
|
38
|
-
|
|
39
|
-
|
|
69
|
+
find-subscriptions --files export.csv --format json
|
|
70
|
+
find-subscriptions --files export.csv --format csv > subscriptions.csv
|
|
40
71
|
```
|
|
41
72
|
|
|
42
73
|
### Sort orders
|
|
@@ -67,29 +98,150 @@ Subscriptions whose last transaction is older than the duration are hidden. Usef
|
|
|
67
98
|
Scan a single Amex export, auto-detecting the schema:
|
|
68
99
|
|
|
69
100
|
```
|
|
70
|
-
|
|
101
|
+
find-subscriptions --files Amex-2025.csv
|
|
71
102
|
```
|
|
72
103
|
|
|
73
104
|
Scan multiple files and force a schema:
|
|
74
105
|
|
|
75
106
|
```
|
|
76
|
-
|
|
107
|
+
find-subscriptions --files jan.csv,feb.csv --schema american_express
|
|
77
108
|
```
|
|
78
109
|
|
|
79
110
|
Filter out known/expected subscriptions and show only recent ones:
|
|
80
111
|
|
|
81
112
|
```
|
|
82
|
-
|
|
113
|
+
find-subscriptions --files Amex-2025.csv \
|
|
83
114
|
--known-payees data/known_payees.yml \
|
|
84
115
|
--inactive-for 6months \
|
|
85
116
|
--sort last_desc
|
|
86
117
|
```
|
|
87
118
|
|
|
88
|
-
##
|
|
119
|
+
## Config file
|
|
120
|
+
|
|
121
|
+
Save your preferred defaults so you don't have to repeat flags every run.
|
|
122
|
+
|
|
123
|
+
**1. Create a YAML config file:**
|
|
124
|
+
|
|
125
|
+
```yaml
|
|
126
|
+
# ~/.find-subscriptions.yml
|
|
127
|
+
sort: last_desc
|
|
128
|
+
format: text
|
|
129
|
+
min_amount: "5.00"
|
|
130
|
+
inactive_for: 6months
|
|
131
|
+
filter_known_payees: true
|
|
132
|
+
known_payees_path: ~/.config/find-subscriptions/known_payees.yml
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**2. Register it:**
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
find-subscriptions --set-config ~/.find-subscriptions.yml
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
This writes the path to `~/.find-subscriptions-config-path`. On every subsequent run, options from that file are loaded as defaults before any CLI flags are applied.
|
|
142
|
+
|
|
143
|
+
**Option precedence (lowest to highest):**
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
built-in defaults → config file → CLI flags
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
CLI flags always win. The config file fills in anything you don't pass on the command line.
|
|
150
|
+
|
|
151
|
+
**Supported config keys:**
|
|
152
|
+
|
|
153
|
+
| Key | Type | Description |
|
|
154
|
+
|-----|------|-------------|
|
|
155
|
+
| `sort` | string | Default sort order |
|
|
156
|
+
| `format` | string | Default output format |
|
|
157
|
+
| `min_amount` | string | Minimum recurring amount to show |
|
|
158
|
+
| `inactive_for` | string | Hide subscriptions older than this duration |
|
|
159
|
+
| `filter_known_payees` | boolean | Apply `known_payees_path` filter by default |
|
|
160
|
+
| `known_payees_path` | path | Default known-payees YAML path |
|
|
161
|
+
| `schema` | string | Default schema name |
|
|
162
|
+
| `from_date` | string (`YYYY-MM-DD`) | Default `--from` date |
|
|
163
|
+
| `to_date` | string (`YYYY-MM-DD`) | Default `--to` date |
|
|
164
|
+
| `schemas` | mapping | User-defined CSV schemas (see below) |
|
|
165
|
+
|
|
166
|
+
> `files` is intentionally not supported in the config — you must provide it on the command line each run.
|
|
167
|
+
|
|
168
|
+
## User-defined schemas
|
|
169
|
+
|
|
170
|
+
Define custom CSV schemas in your config file without writing any Ruby code. This lets you analyze exports from banks not built into the tool.
|
|
171
|
+
|
|
172
|
+
Add a top-level `schemas:` key to your config YAML:
|
|
173
|
+
|
|
174
|
+
```yaml
|
|
175
|
+
schemas:
|
|
176
|
+
my_bank:
|
|
177
|
+
required_headers:
|
|
178
|
+
- Date
|
|
179
|
+
- Description
|
|
180
|
+
- Amount
|
|
181
|
+
amount_key: Amount
|
|
182
|
+
direction: negative_debit
|
|
183
|
+
date_column: Date
|
|
184
|
+
date_format: "%Y-%m-%d"
|
|
185
|
+
payee_column: Description
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Then use it by name:
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
find-subscriptions --files my_bank_export.csv --schema my_bank
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Or let it auto-detect (your schema is checked after the built-ins, so unique headers help):
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
find-subscriptions --files my_bank_export.csv
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Schema config keys
|
|
201
|
+
|
|
202
|
+
| Key | Required | Description |
|
|
203
|
+
|-----|----------|-------------|
|
|
204
|
+
| `required_headers` | yes | Array of CSV column names that must be present |
|
|
205
|
+
| `amount_key` | yes | Column name containing the transaction amount |
|
|
206
|
+
| `direction` | yes | How to determine if a transaction is outgoing (see below) |
|
|
207
|
+
| `date_column` | yes | Column name containing the transaction date |
|
|
208
|
+
| `date_format` | yes | `strptime`-compatible format string (e.g. `%Y-%m-%d`, `%m/%d/%Y`) |
|
|
209
|
+
| `payee_column` | yes | Column name containing the payee/description |
|
|
210
|
+
| `indicator_column` | if `direction: indicator_column` | Column holding debit/credit labels |
|
|
211
|
+
| `debit_value` | no | Value in `indicator_column` that means "debit" (default: `Debit`) |
|
|
212
|
+
|
|
213
|
+
### Direction strategies
|
|
214
|
+
|
|
215
|
+
| Strategy | When to use |
|
|
216
|
+
|----------|-------------|
|
|
217
|
+
| `negative_debit` | Outgoing charges are **negative** numbers (most generic bank CSVs) |
|
|
218
|
+
| `positive_debit` | Outgoing charges are **positive** numbers (e.g. American Express) |
|
|
219
|
+
| `indicator_column` | A separate column labels rows as `Debit` or `Credit` (e.g. Navy Federal) |
|
|
220
|
+
|
|
221
|
+
**`indicator_column` example:**
|
|
222
|
+
|
|
223
|
+
```yaml
|
|
224
|
+
schemas:
|
|
225
|
+
regional_bank:
|
|
226
|
+
required_headers:
|
|
227
|
+
- Posted Date
|
|
228
|
+
- Description
|
|
229
|
+
- Amount
|
|
230
|
+
- Transaction Type
|
|
231
|
+
amount_key: Amount
|
|
232
|
+
direction: indicator_column
|
|
233
|
+
indicator_column: Transaction Type
|
|
234
|
+
debit_value: Debit
|
|
235
|
+
date_column: Posted Date
|
|
236
|
+
date_format: "%m/%d/%Y"
|
|
237
|
+
payee_column: Description
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Supported built-in schemas
|
|
89
241
|
|
|
90
242
|
| Name | Bank / Issuer | Required CSV headers |
|
|
91
243
|
|------|---------------|----------------------|
|
|
92
|
-
| `american_express` | American Express | `Date`, `Description`, `Amount` |
|
|
244
|
+
| `american_express` | American Express | `Date`, `Description`, `Amount`, `Card Member` |
|
|
93
245
|
| `navy_federal` | Navy Federal Credit Union | `Transaction Date`, `Description`, `Amount`, `Credit Debit Indicator` |
|
|
94
246
|
| `generic` | Generic (YYYY-MM-DD dates) | `Date`, `Description`, `Amount` |
|
|
95
247
|
|
|
@@ -117,7 +269,7 @@ Each entry requires:
|
|
|
117
269
|
- `normalized` — internal deduplication key (lowercase, used for grouping)
|
|
118
270
|
- `patterns` — list of Ruby regex literals in `/pattern/flags` format
|
|
119
271
|
|
|
120
|
-
`data/known_payees.yml` is the default file and is always loaded for payee normalization (display names). Filtering only applies when `--known-payees` is explicitly passed.
|
|
272
|
+
`data/known_payees.yml` is the default file and is always loaded for payee normalization (display names). Filtering only applies when `--known-payees` is explicitly passed (or `filter_known_payees: true` is set in the config).
|
|
121
273
|
|
|
122
274
|
## Output format
|
|
123
275
|
|
|
@@ -126,3 +278,7 @@ Subscriptions:
|
|
|
126
278
|
- SPOTIFY : $9.99 since January 2025 (14 transactions) until February 2026
|
|
127
279
|
- NETFLIX.COM : $15.49 since March 2024 (12 transactions) until February 2026
|
|
128
280
|
```
|
|
281
|
+
|
|
282
|
+
## License
|
|
283
|
+
|
|
284
|
+
PolyForm Noncommercial License 1.0.0 — free to use and fork for personal and open-source projects. Commercial use prohibited. See [LICENSE](LICENSE).
|
|
@@ -7,6 +7,9 @@ require 'yaml'
|
|
|
7
7
|
require 'bigdecimal'
|
|
8
8
|
require 'date'
|
|
9
9
|
|
|
10
|
+
require_relative 'help_text'
|
|
11
|
+
require_relative 'config_loader'
|
|
12
|
+
require_relative 'user_schema_builder'
|
|
10
13
|
require_relative 'transaction'
|
|
11
14
|
require_relative 'schema_registry'
|
|
12
15
|
require_relative '../schemas/generic'
|
|
@@ -38,12 +41,12 @@ module FindSubscriptions
|
|
|
38
41
|
end
|
|
39
42
|
|
|
40
43
|
def run(argv)
|
|
41
|
-
options = parse_options(argv)
|
|
44
|
+
options, user_schemas = parse_options(argv)
|
|
42
45
|
|
|
43
46
|
files = options.fetch(:files)
|
|
44
47
|
raise ArgumentError, 'No files provided' if files.empty?
|
|
45
48
|
|
|
46
|
-
registry = build_registry
|
|
49
|
+
registry = build_registry(user_schemas)
|
|
47
50
|
transactions = load_transactions(files, registry, options[:schema])
|
|
48
51
|
transactions = filter_by_date_range(transactions, options[:from_date], options[:to_date])
|
|
49
52
|
payee_normalizer = PayeeNormalizer.from_yaml(options[:known_payees_path])
|
|
@@ -85,7 +88,7 @@ module FindSubscriptions
|
|
|
85
88
|
|
|
86
89
|
def inactive_cutoff(count, unit, today)
|
|
87
90
|
case unit
|
|
88
|
-
when 'year' then today << (count * 12)
|
|
91
|
+
when 'year' then today << (count * 12) # Date#<< subtracts months
|
|
89
92
|
when 'month' then today << count
|
|
90
93
|
when 'week' then today - (count * 7)
|
|
91
94
|
end
|
|
@@ -138,10 +141,39 @@ module FindSubscriptions
|
|
|
138
141
|
format('%.2f', decimal.to_f)
|
|
139
142
|
end
|
|
140
143
|
|
|
144
|
+
# Three-layer precedence: built-in defaults → config file → CLI flags.
|
|
145
|
+
# Returns [options_hash, user_schemas_hash].
|
|
141
146
|
def parse_options(argv)
|
|
142
|
-
|
|
147
|
+
handle_help(argv)
|
|
148
|
+
config = ConfigLoader.load
|
|
149
|
+
options = default_options.merge(config.options)
|
|
143
150
|
define_option_parser(options).parse!(argv)
|
|
144
|
-
options
|
|
151
|
+
[options, config.schemas]
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def handle_help(argv)
|
|
155
|
+
return print_full_help if argv.empty?
|
|
156
|
+
return unless argv.include?('--help') || argv.include?('-h')
|
|
157
|
+
|
|
158
|
+
print_targeted_or_full_help(argv)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def print_targeted_or_full_help(argv)
|
|
162
|
+
flag = other_long_flags(argv).first
|
|
163
|
+
return print_full_help unless flag
|
|
164
|
+
|
|
165
|
+
puts HelpFormatter.flag_help(flag) || "No help available for --#{flag}."
|
|
166
|
+
exit 0
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def other_long_flags(argv)
|
|
170
|
+
argv.select { |a| a.start_with?('--') && a != '--help' }
|
|
171
|
+
.map { |a| a.delete_prefix('--') }
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def print_full_help
|
|
175
|
+
puts HelpFormatter.full_help
|
|
176
|
+
exit 0
|
|
145
177
|
end
|
|
146
178
|
|
|
147
179
|
DEFAULT_KNOWN_PAYEES_PATH = File.expand_path('../../data/known_payees.yml', __dir__).freeze
|
|
@@ -156,28 +188,33 @@ module FindSubscriptions
|
|
|
156
188
|
}
|
|
157
189
|
end
|
|
158
190
|
|
|
159
|
-
def define_option_parser(options)
|
|
191
|
+
def define_option_parser(options)
|
|
160
192
|
OptionParser.new do |opt|
|
|
161
193
|
opt.banner = 'Usage: find-subscriptions --files a.csv,b.csv [--schema NAME]'
|
|
162
|
-
opt
|
|
163
|
-
options[:files] = val.split(',').map(&:strip).reject(&:empty?)
|
|
164
|
-
end
|
|
165
|
-
opt.on('--schema NAME', 'Force schema name (otherwise auto-detect)') do |val|
|
|
166
|
-
options[:schema] = val.strip
|
|
167
|
-
end
|
|
168
|
-
opt.on('--known-payees PATH', 'Known payees YAML; matched payees are filtered from output') do |val|
|
|
169
|
-
options[:known_payees_path] = val
|
|
170
|
-
options[:filter_known_payees] = true
|
|
171
|
-
end
|
|
172
|
-
opt.on('--inactive-for DURATION',
|
|
173
|
-
'Hide subscriptions with no transactions in DURATION (e.g. 6months, 1year, 3weeks)') do |val|
|
|
174
|
-
options[:inactive_for] = val.strip
|
|
175
|
-
end
|
|
176
|
-
opt.on('--min-amount AMOUNT', 'Hide subscriptions with a recurring charge below AMOUNT') do |val|
|
|
177
|
-
options[:min_amount] = val.strip
|
|
178
|
-
end
|
|
194
|
+
register_filter_options(opt, options)
|
|
179
195
|
register_date_range_options(opt, options)
|
|
180
196
|
register_presentation_options(opt, options)
|
|
197
|
+
register_config_options(opt)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def register_filter_options(opt, options) # rubocop:disable Metrics/MethodLength
|
|
202
|
+
opt.on('--files FILES', 'Comma-separated list of CSV files') do |val|
|
|
203
|
+
options[:files] = val.split(',').map(&:strip).reject(&:empty?)
|
|
204
|
+
end
|
|
205
|
+
opt.on('--schema NAME', 'Force schema name (otherwise auto-detect)') do |val|
|
|
206
|
+
options[:schema] = val.strip
|
|
207
|
+
end
|
|
208
|
+
opt.on('--known-payees PATH', 'Known payees YAML; matched payees are filtered from output') do |val|
|
|
209
|
+
options[:known_payees_path] = val
|
|
210
|
+
options[:filter_known_payees] = true
|
|
211
|
+
end
|
|
212
|
+
opt.on('--inactive-for DURATION',
|
|
213
|
+
'Hide subscriptions with no transactions in DURATION (e.g. 6months, 1year, 3weeks)') do |val|
|
|
214
|
+
options[:inactive_for] = val.strip
|
|
215
|
+
end
|
|
216
|
+
opt.on('--min-amount AMOUNT', 'Hide subscriptions with a recurring charge below AMOUNT') do |val|
|
|
217
|
+
options[:min_amount] = val.strip
|
|
181
218
|
end
|
|
182
219
|
end
|
|
183
220
|
|
|
@@ -199,11 +236,21 @@ module FindSubscriptions
|
|
|
199
236
|
end
|
|
200
237
|
end
|
|
201
238
|
|
|
202
|
-
def
|
|
239
|
+
def register_config_options(opt)
|
|
240
|
+
opt.on('--set-config PATH', 'Register a config YAML file for future runs') do |val|
|
|
241
|
+
ConfigLoader.set_config_path(val)
|
|
242
|
+
exit 0
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Built-in schemas are registered first so they take precedence during
|
|
247
|
+
# auto-detection. User schemas are appended after and can be forced via --schema.
|
|
248
|
+
def build_registry(user_schemas = {})
|
|
203
249
|
registry = SchemaRegistry.new
|
|
204
250
|
registry.register('american_express', Schemas.american_express)
|
|
205
251
|
registry.register('navy_federal', Schemas.navy_federal)
|
|
206
252
|
registry.register('generic', Schemas.generic)
|
|
253
|
+
user_schemas.each { |name, cfg| registry.register(name, UserSchemaBuilder.build(name, cfg)) }
|
|
207
254
|
registry
|
|
208
255
|
end
|
|
209
256
|
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
require 'date'
|
|
5
|
+
|
|
6
|
+
module FindSubscriptions
|
|
7
|
+
# Loads user configuration from a YAML file registered via --set-config.
|
|
8
|
+
#
|
|
9
|
+
# The pointer file (~/.find-subscriptions-config-path) stores the path to the
|
|
10
|
+
# actual config YAML. ConfigLoader.load returns a Result with parsed options
|
|
11
|
+
# and any user-defined schema definitions.
|
|
12
|
+
class ConfigLoader
|
|
13
|
+
CONFIG_POINTER_PATH = File.expand_path('~/.find-subscriptions-config-path').freeze
|
|
14
|
+
|
|
15
|
+
# Returned by ConfigLoader.load; carries parsed options and schema defs.
|
|
16
|
+
Result = Struct.new(:options, :schemas)
|
|
17
|
+
|
|
18
|
+
# Config YAML keys grouped by how they should be parsed.
|
|
19
|
+
# Amounts stay as strings so BigDecimal conversion happens at the filter site.
|
|
20
|
+
STRING_KEYS = %w[sort format schema inactive_for].freeze
|
|
21
|
+
BOOL_KEYS = %w[filter_known_payees].freeze
|
|
22
|
+
PATH_KEYS = %w[known_payees_path].freeze # expanded via File.expand_path
|
|
23
|
+
DATE_KEYS = %w[from_date to_date].freeze # parsed to Date objects
|
|
24
|
+
AMOUNT_KEYS = %w[min_amount].freeze # kept as strings
|
|
25
|
+
|
|
26
|
+
def self.load(pointer_path: CONFIG_POINTER_PATH)
|
|
27
|
+
return Result.new({}, {}) unless File.exist?(pointer_path)
|
|
28
|
+
|
|
29
|
+
config_path = File.read(pointer_path).strip
|
|
30
|
+
return Result.new({}, {}) unless File.exist?(config_path)
|
|
31
|
+
|
|
32
|
+
raw = YAML.safe_load_file(config_path) || {}
|
|
33
|
+
Result.new(parse_options(raw), raw.fetch('schemas', {}))
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.set_config_path(path, pointer_path: CONFIG_POINTER_PATH)
|
|
37
|
+
expanded = File.expand_path(path)
|
|
38
|
+
raise ArgumentError, "Config file not found: #{expanded}" unless File.exist?(expanded)
|
|
39
|
+
|
|
40
|
+
File.write(pointer_path, expanded)
|
|
41
|
+
puts "Config registered: #{expanded}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.parse_options(raw)
|
|
45
|
+
options = {}
|
|
46
|
+
extract_strings(raw, options)
|
|
47
|
+
extract_bools(raw, options)
|
|
48
|
+
extract_paths(raw, options)
|
|
49
|
+
extract_dates(raw, options)
|
|
50
|
+
extract_amounts(raw, options)
|
|
51
|
+
options
|
|
52
|
+
end
|
|
53
|
+
private_class_method :parse_options
|
|
54
|
+
|
|
55
|
+
def self.extract_strings(raw, options)
|
|
56
|
+
STRING_KEYS.each do |key|
|
|
57
|
+
options[key.to_sym] = raw[key].to_s if raw.key?(key)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
private_class_method :extract_strings
|
|
61
|
+
|
|
62
|
+
def self.extract_bools(raw, options)
|
|
63
|
+
BOOL_KEYS.each do |key|
|
|
64
|
+
options[key.to_sym] = raw[key] if raw.key?(key)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
private_class_method :extract_bools
|
|
68
|
+
|
|
69
|
+
def self.extract_paths(raw, options)
|
|
70
|
+
PATH_KEYS.each do |key|
|
|
71
|
+
options[key.to_sym] = File.expand_path(raw[key].to_s) if raw.key?(key)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
private_class_method :extract_paths
|
|
75
|
+
|
|
76
|
+
def self.extract_dates(raw, options)
|
|
77
|
+
DATE_KEYS.each do |key|
|
|
78
|
+
options[key.to_sym] = Date.parse(raw[key].to_s) if raw.key?(key)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
private_class_method :extract_dates
|
|
82
|
+
|
|
83
|
+
def self.extract_amounts(raw, options)
|
|
84
|
+
AMOUNT_KEYS.each do |key|
|
|
85
|
+
options[key.to_sym] = raw[key].to_s if raw.key?(key)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
private_class_method :extract_amounts
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FindSubscriptions
|
|
4
|
+
# Detailed per-flag help strings, keyed by flag name (without leading --).
|
|
5
|
+
# Used by HelpFormatter to render full and targeted help output.
|
|
6
|
+
module HelpText
|
|
7
|
+
DESCRIPTIONS = {
|
|
8
|
+
'files' => <<~DESC.strip,
|
|
9
|
+
--files FILES
|
|
10
|
+
One or more CSV files to analyze, separated by commas.
|
|
11
|
+
Required. At least one file must be provided.
|
|
12
|
+
Example: --files transactions.csv,checking.csv
|
|
13
|
+
DESC
|
|
14
|
+
'schema' => <<~DESC.strip,
|
|
15
|
+
--schema NAME
|
|
16
|
+
Force a specific schema for parsing CSV files. If omitted, the
|
|
17
|
+
schema is auto-detected from the CSV headers.
|
|
18
|
+
Built-in schemas: american_express, navy_federal, generic
|
|
19
|
+
User-defined schemas can be added via a config file (see --set-config).
|
|
20
|
+
Example: --schema american_express
|
|
21
|
+
DESC
|
|
22
|
+
'known-payees' => <<~DESC.strip,
|
|
23
|
+
--known-payees PATH
|
|
24
|
+
Path to a YAML file listing payees to exclude from output. Useful
|
|
25
|
+
for ignoring loan payments, rent, or other expected recurring charges.
|
|
26
|
+
Example: --known-payees ~/.config/known_payees.yml
|
|
27
|
+
DESC
|
|
28
|
+
'inactive-for' => <<~DESC.strip,
|
|
29
|
+
--inactive-for DURATION
|
|
30
|
+
Hide subscriptions with no transactions within the given duration.
|
|
31
|
+
Format: NUMBER(year|month|week)[s]
|
|
32
|
+
Example: --inactive-for 6months, --inactive-for 1year, --inactive-for 3weeks
|
|
33
|
+
DESC
|
|
34
|
+
'min-amount' => <<~DESC.strip,
|
|
35
|
+
--min-amount AMOUNT
|
|
36
|
+
Hide subscriptions with a recurring charge below AMOUNT.
|
|
37
|
+
Useful for filtering out low-value noise transactions.
|
|
38
|
+
Example: --min-amount 5.00
|
|
39
|
+
DESC
|
|
40
|
+
'from' => <<~DESC.strip,
|
|
41
|
+
--from DATE
|
|
42
|
+
Only include transactions on or after DATE (format: YYYY-MM-DD).
|
|
43
|
+
Filters the transaction window before detecting subscriptions.
|
|
44
|
+
Example: --from 2025-01-01
|
|
45
|
+
DESC
|
|
46
|
+
'to' => <<~DESC.strip,
|
|
47
|
+
--to DATE
|
|
48
|
+
Only include transactions on or before DATE (format: YYYY-MM-DD).
|
|
49
|
+
Filters the transaction window before detecting subscriptions.
|
|
50
|
+
Example: --to 2025-12-31
|
|
51
|
+
DESC
|
|
52
|
+
'sort' => <<~DESC.strip,
|
|
53
|
+
--sort ORDER
|
|
54
|
+
Sort the output subscriptions by the given order. Default: first_desc
|
|
55
|
+
Valid orders:
|
|
56
|
+
count_asc — fewest occurrences first
|
|
57
|
+
count_desc — most occurrences first
|
|
58
|
+
first_asc — earliest first-seen date first
|
|
59
|
+
first_desc — most recently first-seen first (default)
|
|
60
|
+
last_asc — earliest last-seen date first
|
|
61
|
+
last_desc — most recently last-seen first
|
|
62
|
+
Example: --sort last_desc
|
|
63
|
+
DESC
|
|
64
|
+
'format' => <<~DESC.strip,
|
|
65
|
+
--format FORMAT
|
|
66
|
+
Output format for the results. Default: text
|
|
67
|
+
Valid formats:
|
|
68
|
+
text — human-readable table (default)
|
|
69
|
+
json — JSON array of subscription objects
|
|
70
|
+
csv — comma-separated values with headers
|
|
71
|
+
Example: --format json
|
|
72
|
+
DESC
|
|
73
|
+
'set-config' => <<~DESC.strip
|
|
74
|
+
--set-config PATH
|
|
75
|
+
Register a YAML config file for future runs. Options in the config
|
|
76
|
+
file serve as defaults that CLI flags can override.
|
|
77
|
+
Supported keys: sort, format, known_payees_path, min_amount,
|
|
78
|
+
from_date, to_date, filter_known_payees, schema, inactive_for.
|
|
79
|
+
Add user-defined schemas under a top-level 'schemas:' key.
|
|
80
|
+
Example: --set-config ~/.find-subscriptions.yml
|
|
81
|
+
DESC
|
|
82
|
+
}.freeze
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Formats help output for the CLI: full help and per-flag targeted help.
|
|
86
|
+
module HelpFormatter
|
|
87
|
+
BANNER = <<~BANNER
|
|
88
|
+
find-subscriptions — detect recurring charges in your bank/credit card exports
|
|
89
|
+
|
|
90
|
+
Usage: find-subscriptions --files a.csv,b.csv [OPTIONS]
|
|
91
|
+
BANNER
|
|
92
|
+
|
|
93
|
+
def self.full_help
|
|
94
|
+
sections = HelpText::DESCRIPTIONS.map { |_key, desc| desc }
|
|
95
|
+
"#{BANNER}\n#{sections.join("\n\n")}\n"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def self.flag_help(flag_name)
|
|
99
|
+
HelpText::DESCRIPTIONS[flag_name]
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'date'
|
|
4
|
+
require_relative 'schema_registry'
|
|
5
|
+
require_relative 'transaction'
|
|
6
|
+
|
|
7
|
+
module FindSubscriptions
|
|
8
|
+
# Builds a CsvSchema from a user-provided YAML configuration hash.
|
|
9
|
+
#
|
|
10
|
+
# Supported direction strategies:
|
|
11
|
+
# positive_debit — positive amounts are outgoing (e.g. AmEx)
|
|
12
|
+
# negative_debit — negative amounts are outgoing (e.g. generic bank export)
|
|
13
|
+
# indicator_column — a separate column holds "Debit"/"Credit" labels
|
|
14
|
+
class UserSchemaBuilder
|
|
15
|
+
BASE_REQUIRED_KEYS = %w[required_headers amount_key direction date_column date_format payee_column].freeze
|
|
16
|
+
|
|
17
|
+
# Simple direction lambdas: amount sign alone determines debit/credit.
|
|
18
|
+
# indicator_column is handled separately since it needs config context.
|
|
19
|
+
DIRECTION_STRATEGIES = {
|
|
20
|
+
'positive_debit' => ->(_row, amount) { amount.positive? ? :debit : :credit },
|
|
21
|
+
'negative_debit' => ->(_row, amount) { amount.negative? ? :debit : :credit }
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
def self.build(name, config)
|
|
25
|
+
validate_base!(name, config)
|
|
26
|
+
CsvSchema.new(
|
|
27
|
+
required_headers: config.fetch('required_headers'),
|
|
28
|
+
amount_key: config.fetch('amount_key'),
|
|
29
|
+
direction: build_direction(name, config),
|
|
30
|
+
mapping: build_mapping(config)
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.validate_base!(name, config)
|
|
35
|
+
missing = BASE_REQUIRED_KEYS - config.keys
|
|
36
|
+
return if missing.empty?
|
|
37
|
+
|
|
38
|
+
raise ArgumentError, "Schema '#{name}' missing required keys: #{missing.join(', ')}"
|
|
39
|
+
end
|
|
40
|
+
private_class_method :validate_base!
|
|
41
|
+
|
|
42
|
+
def self.build_direction(name, config)
|
|
43
|
+
strategy = config.fetch('direction')
|
|
44
|
+
return DIRECTION_STRATEGIES[strategy] if DIRECTION_STRATEGIES.key?(strategy)
|
|
45
|
+
return build_indicator_direction(config) if strategy == 'indicator_column'
|
|
46
|
+
|
|
47
|
+
raise ArgumentError,
|
|
48
|
+
"Schema '#{name}' has unknown direction: #{strategy.inspect}. " \
|
|
49
|
+
'Valid: positive_debit, negative_debit, indicator_column'
|
|
50
|
+
end
|
|
51
|
+
private_class_method :build_direction
|
|
52
|
+
|
|
53
|
+
def self.build_indicator_direction(config)
|
|
54
|
+
col = config.fetch('indicator_column') do
|
|
55
|
+
raise ArgumentError, 'indicator_column direction requires indicator_column key'
|
|
56
|
+
end
|
|
57
|
+
debit_val = config.fetch('debit_value', 'Debit')
|
|
58
|
+
->(row, _amount) { row[col]&.strip == debit_val ? :debit : :credit }
|
|
59
|
+
end
|
|
60
|
+
private_class_method :build_indicator_direction
|
|
61
|
+
|
|
62
|
+
# Returns a lambda that closes over the column/format config so CsvSchema
|
|
63
|
+
# can call it without knowing the schema structure.
|
|
64
|
+
def self.build_mapping(config)
|
|
65
|
+
date_col = config.fetch('date_column')
|
|
66
|
+
date_fmt = config.fetch('date_format')
|
|
67
|
+
payee_col = config.fetch('payee_column')
|
|
68
|
+
->(row, signed_amount) { map_row(row, signed_amount, date_col, date_fmt, payee_col) }
|
|
69
|
+
end
|
|
70
|
+
private_class_method :build_mapping
|
|
71
|
+
|
|
72
|
+
def self.map_row(row, signed_amount, date_col, date_fmt, payee_col)
|
|
73
|
+
Transaction.new(
|
|
74
|
+
date: Date.strptime(row[date_col], date_fmt),
|
|
75
|
+
payee: row.fetch(payee_col).to_s.strip,
|
|
76
|
+
amount: signed_amount,
|
|
77
|
+
raw: row
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
private_class_method :map_row
|
|
81
|
+
end
|
|
82
|
+
end
|
data/lib/find_subscriptions.rb
CHANGED
|
@@ -1,4 +1,29 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# FindSubscriptions detects recurring charges in bank and credit card CSV exports.
|
|
4
|
+
#
|
|
5
|
+
# The main entry point for consumers of the library is {FindSubscriptions::CLI}.
|
|
6
|
+
# CSV parsing is handled by {FindSubscriptions::CsvSchema} and {FindSubscriptions::SchemaRegistry}.
|
|
7
|
+
# Repeat-charge detection lives in {FindSubscriptions::Detectors::RepeatCharges}.
|
|
8
|
+
#
|
|
9
|
+
# @see FindSubscriptions::CLI
|
|
10
|
+
# @see FindSubscriptions::ConfigLoader
|
|
11
|
+
# @see FindSubscriptions::UserSchemaBuilder
|
|
12
|
+
module FindSubscriptions
|
|
13
|
+
end
|
|
14
|
+
|
|
4
15
|
require_relative 'find_subscriptions/transaction'
|
|
16
|
+
require_relative 'find_subscriptions/schema_registry'
|
|
17
|
+
require_relative 'find_subscriptions/payee_normalizer'
|
|
18
|
+
require_relative 'find_subscriptions/help_text'
|
|
19
|
+
require_relative 'find_subscriptions/config_loader'
|
|
20
|
+
require_relative 'find_subscriptions/user_schema_builder'
|
|
21
|
+
require_relative 'schemas/generic'
|
|
22
|
+
require_relative 'schemas/american_express'
|
|
23
|
+
require_relative 'schemas/navy_federal'
|
|
24
|
+
require_relative 'detectors/known_payees'
|
|
25
|
+
require_relative 'detectors/repeat_charges'
|
|
26
|
+
require_relative 'output/stdout_reporter'
|
|
27
|
+
require_relative 'output/json_reporter'
|
|
28
|
+
require_relative 'output/csv_reporter'
|
|
29
|
+
require_relative 'find_subscriptions/cli'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: find-subscriptions
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jeffrey Baird
|
|
@@ -70,7 +70,7 @@ description: |
|
|
|
70
70
|
A CLI tool that analyzes CSV files from banks and credit cards to detect
|
|
71
71
|
subscription charges based on known payees and recurring transaction patterns.
|
|
72
72
|
email:
|
|
73
|
-
-
|
|
73
|
+
- jeff@jeffreyleebaird.com
|
|
74
74
|
executables:
|
|
75
75
|
- find-subscriptions
|
|
76
76
|
extensions: []
|
|
@@ -83,9 +83,12 @@ files:
|
|
|
83
83
|
- lib/detectors/repeat_charges.rb
|
|
84
84
|
- lib/find_subscriptions.rb
|
|
85
85
|
- lib/find_subscriptions/cli.rb
|
|
86
|
+
- lib/find_subscriptions/config_loader.rb
|
|
87
|
+
- lib/find_subscriptions/help_text.rb
|
|
86
88
|
- lib/find_subscriptions/payee_normalizer.rb
|
|
87
89
|
- lib/find_subscriptions/schema_registry.rb
|
|
88
90
|
- lib/find_subscriptions/transaction.rb
|
|
91
|
+
- lib/find_subscriptions/user_schema_builder.rb
|
|
89
92
|
- lib/output/csv_reporter.rb
|
|
90
93
|
- lib/output/json_reporter.rb
|
|
91
94
|
- lib/output/stdout_reporter.rb
|