insider-scanner 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. insider_scanner-1.0.0/LICENSE +21 -0
  2. insider_scanner-1.0.0/PKG-INFO +438 -0
  3. insider_scanner-1.0.0/README.md +391 -0
  4. insider_scanner-1.0.0/pyproject.toml +81 -0
  5. insider_scanner-1.0.0/setup.cfg +4 -0
  6. insider_scanner-1.0.0/src/insider_scanner/__init__.py +3 -0
  7. insider_scanner-1.0.0/src/insider_scanner/cli.py +407 -0
  8. insider_scanner-1.0.0/src/insider_scanner/core/__init__.py +1 -0
  9. insider_scanner-1.0.0/src/insider_scanner/core/afm.py +253 -0
  10. insider_scanner-1.0.0/src/insider_scanner/core/amf.py +522 -0
  11. insider_scanner-1.0.0/src/insider_scanner/core/bafin.py +462 -0
  12. insider_scanner-1.0.0/src/insider_scanner/core/congress_house.py +639 -0
  13. insider_scanner-1.0.0/src/insider_scanner/core/congress_senate.py +685 -0
  14. insider_scanner-1.0.0/src/insider_scanner/core/edgar.py +201 -0
  15. insider_scanner-1.0.0/src/insider_scanner/core/eu_merger.py +230 -0
  16. insider_scanner-1.0.0/src/insider_scanner/core/eu_models.py +197 -0
  17. insider_scanner-1.0.0/src/insider_scanner/core/eu_scan.py +161 -0
  18. insider_scanner-1.0.0/src/insider_scanner/core/merger.py +218 -0
  19. insider_scanner-1.0.0/src/insider_scanner/core/models.py +191 -0
  20. insider_scanner-1.0.0/src/insider_scanner/core/openinsider.py +214 -0
  21. insider_scanner-1.0.0/src/insider_scanner/core/prices/__init__.py +23 -0
  22. insider_scanner-1.0.0/src/insider_scanner/core/prices/model.py +77 -0
  23. insider_scanner-1.0.0/src/insider_scanner/core/prices/registry.py +49 -0
  24. insider_scanner-1.0.0/src/insider_scanner/core/prices/repository.py +144 -0
  25. insider_scanner-1.0.0/src/insider_scanner/core/prices/service.py +88 -0
  26. insider_scanner-1.0.0/src/insider_scanner/core/prices/source.py +53 -0
  27. insider_scanner-1.0.0/src/insider_scanner/core/prices/tiingo.py +143 -0
  28. insider_scanner-1.0.0/src/insider_scanner/core/prices/yahoo.py +145 -0
  29. insider_scanner-1.0.0/src/insider_scanner/core/rns_investegate.py +487 -0
  30. insider_scanner-1.0.0/src/insider_scanner/core/secform4.py +309 -0
  31. insider_scanner-1.0.0/src/insider_scanner/core/senate.py +129 -0
  32. insider_scanner-1.0.0/src/insider_scanner/gui/__init__.py +1 -0
  33. insider_scanner-1.0.0/src/insider_scanner/gui/analysis_tab.py +168 -0
  34. insider_scanner-1.0.0/src/insider_scanner/gui/congress_tab.py +624 -0
  35. insider_scanner-1.0.0/src/insider_scanner/gui/european_tab.py +541 -0
  36. insider_scanner-1.0.0/src/insider_scanner/gui/main_window.py +177 -0
  37. insider_scanner-1.0.0/src/insider_scanner/gui/price_chart.py +243 -0
  38. insider_scanner-1.0.0/src/insider_scanner/gui/scan_tab.py +585 -0
  39. insider_scanner-1.0.0/src/insider_scanner/gui/theme/__init__.py +88 -0
  40. insider_scanner-1.0.0/src/insider_scanner/gui/theme/contrast.py +56 -0
  41. insider_scanner-1.0.0/src/insider_scanner/gui/theme/fonts.py +54 -0
  42. insider_scanner-1.0.0/src/insider_scanner/gui/theme/manager.py +225 -0
  43. insider_scanner-1.0.0/src/insider_scanner/gui/theme/stylesheet.py +461 -0
  44. insider_scanner-1.0.0/src/insider_scanner/gui/theme/table_style.py +192 -0
  45. insider_scanner-1.0.0/src/insider_scanner/gui/theme/tokens.py +208 -0
  46. insider_scanner-1.0.0/src/insider_scanner/gui/widgets.py +360 -0
  47. insider_scanner-1.0.0/src/insider_scanner/main.py +139 -0
  48. insider_scanner-1.0.0/src/insider_scanner/persistence/__init__.py +13 -0
  49. insider_scanner-1.0.0/src/insider_scanner/persistence/bootstrap.py +123 -0
  50. insider_scanner-1.0.0/src/insider_scanner/persistence/coverage.py +142 -0
  51. insider_scanner-1.0.0/src/insider_scanner/persistence/engine.py +74 -0
  52. insider_scanner-1.0.0/src/insider_scanner/persistence/errors.py +5 -0
  53. insider_scanner-1.0.0/src/insider_scanner/persistence/mappings.py +215 -0
  54. insider_scanner-1.0.0/src/insider_scanner/persistence/migrations.py +43 -0
  55. insider_scanner-1.0.0/src/insider_scanner/persistence/refresh.py +95 -0
  56. insider_scanner-1.0.0/src/insider_scanner/persistence/repositories.py +405 -0
  57. insider_scanner-1.0.0/src/insider_scanner/persistence/schema.py +344 -0
  58. insider_scanner-1.0.0/src/insider_scanner/persistence/types.py +36 -0
  59. insider_scanner-1.0.0/src/insider_scanner/resources/__init__.py +1 -0
  60. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/Inter-Medium.ttf +0 -0
  61. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/Inter-Regular.ttf +0 -0
  62. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/Inter-SemiBold.ttf +0 -0
  63. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/JetBrainsMono-Medium.ttf +0 -0
  64. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/JetBrainsMono-Regular.ttf +0 -0
  65. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/OFL-Inter.txt +92 -0
  66. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/OFL-JetBrainsMono.txt +93 -0
  67. insider_scanner-1.0.0/src/insider_scanner/resources/fonts/__init__.py +7 -0
  68. insider_scanner-1.0.0/src/insider_scanner/resources/seeds/__init__.py +1 -0
  69. insider_scanner-1.0.0/src/insider_scanner/resources/seeds/congress_members.json +10165 -0
  70. insider_scanner-1.0.0/src/insider_scanner/resources/seeds/eu_watchlist.txt +5 -0
  71. insider_scanner-1.0.0/src/insider_scanner/resources/seeds/tickers_watchlist.txt +113 -0
  72. insider_scanner-1.0.0/src/insider_scanner/services/__init__.py +22 -0
  73. insider_scanner-1.0.0/src/insider_scanner/services/adapters.py +113 -0
  74. insider_scanner-1.0.0/src/insider_scanner/services/application.py +34 -0
  75. insider_scanner-1.0.0/src/insider_scanner/services/common.py +95 -0
  76. insider_scanner-1.0.0/src/insider_scanner/services/congress.py +108 -0
  77. insider_scanner-1.0.0/src/insider_scanner/services/context.py +58 -0
  78. insider_scanner-1.0.0/src/insider_scanner/services/european.py +348 -0
  79. insider_scanner-1.0.0/src/insider_scanner/services/importer.py +377 -0
  80. insider_scanner-1.0.0/src/insider_scanner/services/us.py +243 -0
  81. insider_scanner-1.0.0/src/insider_scanner/utils/__init__.py +1 -0
  82. insider_scanner-1.0.0/src/insider_scanner/utils/caching.py +60 -0
  83. insider_scanner-1.0.0/src/insider_scanner/utils/config.py +195 -0
  84. insider_scanner-1.0.0/src/insider_scanner/utils/http.py +94 -0
  85. insider_scanner-1.0.0/src/insider_scanner/utils/logging.py +19 -0
  86. insider_scanner-1.0.0/src/insider_scanner/utils/parsing.py +86 -0
  87. insider_scanner-1.0.0/src/insider_scanner/utils/threading.py +82 -0
  88. insider_scanner-1.0.0/src/insider_scanner.egg-info/PKG-INFO +438 -0
  89. insider_scanner-1.0.0/src/insider_scanner.egg-info/SOURCES.txt +152 -0
  90. insider_scanner-1.0.0/src/insider_scanner.egg-info/dependency_links.txt +1 -0
  91. insider_scanner-1.0.0/src/insider_scanner.egg-info/entry_points.txt +3 -0
  92. insider_scanner-1.0.0/src/insider_scanner.egg-info/requires.txt +25 -0
  93. insider_scanner-1.0.0/src/insider_scanner.egg-info/top_level.txt +1 -0
  94. insider_scanner-1.0.0/tests/test_analysis_tab.py +278 -0
  95. insider_scanner-1.0.0/tests/test_caching.py +68 -0
  96. insider_scanner-1.0.0/tests/test_cli.py +201 -0
  97. insider_scanner-1.0.0/tests/test_cli_persistence_integration.py +371 -0
  98. insider_scanner-1.0.0/tests/test_cli_price.py +23 -0
  99. insider_scanner-1.0.0/tests/test_config.py +203 -0
  100. insider_scanner-1.0.0/tests/test_congress_house.py +809 -0
  101. insider_scanner-1.0.0/tests/test_congress_senate.py +679 -0
  102. insider_scanner-1.0.0/tests/test_congress_tab.py +327 -0
  103. insider_scanner-1.0.0/tests/test_coverage.py +153 -0
  104. insider_scanner-1.0.0/tests/test_edgar.py +155 -0
  105. insider_scanner-1.0.0/tests/test_eu_merger.py +91 -0
  106. insider_scanner-1.0.0/tests/test_eu_models.py +48 -0
  107. insider_scanner-1.0.0/tests/test_eu_sources.py +291 -0
  108. insider_scanner-1.0.0/tests/test_european.py +208 -0
  109. insider_scanner-1.0.0/tests/test_gui.py +370 -0
  110. insider_scanner-1.0.0/tests/test_gui_service_integration.py +448 -0
  111. insider_scanner-1.0.0/tests/test_integration.py +459 -0
  112. insider_scanner-1.0.0/tests/test_legacy_importer.py +339 -0
  113. insider_scanner-1.0.0/tests/test_live.py +73 -0
  114. insider_scanner-1.0.0/tests/test_live_prices.py +47 -0
  115. insider_scanner-1.0.0/tests/test_main_entrypoint.py +211 -0
  116. insider_scanner-1.0.0/tests/test_main_window_theme.py +68 -0
  117. insider_scanner-1.0.0/tests/test_merger.py +196 -0
  118. insider_scanner-1.0.0/tests/test_models.py +166 -0
  119. insider_scanner-1.0.0/tests/test_no_site_packages_writes.py +120 -0
  120. insider_scanner-1.0.0/tests/test_openinsider.py +134 -0
  121. insider_scanner-1.0.0/tests/test_package_data.py +24 -0
  122. insider_scanner-1.0.0/tests/test_packaging.py +52 -0
  123. insider_scanner-1.0.0/tests/test_packaging_metadata.py +81 -0
  124. insider_scanner-1.0.0/tests/test_parsing.py +85 -0
  125. insider_scanner-1.0.0/tests/test_persistence.py +433 -0
  126. insider_scanner-1.0.0/tests/test_persistence_mappings.py +92 -0
  127. insider_scanner-1.0.0/tests/test_price_chart.py +195 -0
  128. insider_scanner-1.0.0/tests/test_price_chart_theming.py +59 -0
  129. insider_scanner-1.0.0/tests/test_price_model.py +87 -0
  130. insider_scanner-1.0.0/tests/test_price_registry.py +98 -0
  131. insider_scanner-1.0.0/tests/test_price_repository.py +60 -0
  132. insider_scanner-1.0.0/tests/test_price_service.py +58 -0
  133. insider_scanner-1.0.0/tests/test_price_source.py +70 -0
  134. insider_scanner-1.0.0/tests/test_readme_commands.py +46 -0
  135. insider_scanner-1.0.0/tests/test_refresh_state.py +113 -0
  136. insider_scanner-1.0.0/tests/test_release_workflow.py +71 -0
  137. insider_scanner-1.0.0/tests/test_repositories.py +556 -0
  138. insider_scanner-1.0.0/tests/test_scan_adapters.py +197 -0
  139. insider_scanner-1.0.0/tests/test_scan_service_failures.py +141 -0
  140. insider_scanner-1.0.0/tests/test_scan_services.py +1203 -0
  141. insider_scanner-1.0.0/tests/test_secform4.py +190 -0
  142. insider_scanner-1.0.0/tests/test_senate.py +117 -0
  143. insider_scanner-1.0.0/tests/test_theme_contrast.py +236 -0
  144. insider_scanner-1.0.0/tests/test_theme_fonts.py +129 -0
  145. insider_scanner-1.0.0/tests/test_theme_manager.py +311 -0
  146. insider_scanner-1.0.0/tests/test_theme_stylesheet.py +155 -0
  147. insider_scanner-1.0.0/tests/test_theme_tokens.py +211 -0
  148. insider_scanner-1.0.0/tests/test_threading.py +30 -0
  149. insider_scanner-1.0.0/tests/test_tiingo_prices.py +463 -0
  150. insider_scanner-1.0.0/tests/test_update_congress.py +457 -0
  151. insider_scanner-1.0.0/tests/test_verify_install_script.py +195 -0
  152. insider_scanner-1.0.0/tests/test_widgets_model_theming.py +346 -0
  153. insider_scanner-1.0.0/tests/test_widgets_theming.py +190 -0
  154. insider_scanner-1.0.0/tests/test_yahoo_prices.py +460 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Łukasz Czarnacki
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,438 @@
1
+ Metadata-Version: 2.4
2
+ Name: insider-scanner
3
+ Version: 1.0.0
4
+ Summary: Scan insider trades (USA, US Congress, EU) with a local SQLite store and price-timeline overlay; CLI + desktop GUI.
5
+ Author: Łukasz Rafał Czarnacki
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Czarnak/insider-scanner
8
+ Project-URL: Repository, https://github.com/Czarnak/insider-scanner
9
+ Project-URL: Issues, https://github.com/Czarnak/insider-scanner/issues
10
+ Keywords: insider-trading,sec,edgar,congress,finance,stocks,pyside6
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: X11 Applications :: Qt
13
+ Classifier: Intended Audience :: Financial and Insurance Industry
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Classifier: Topic :: Office/Business :: Financial :: Investment
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: curl-cffi<1,>=0.14
25
+ Requires-Dist: requests<3,>=2.31
26
+ Requires-Dist: beautifulsoup4<5,>=4.12
27
+ Requires-Dist: lxml<7,>=4.9
28
+ Requires-Dist: pandas<4,>=3.0.0
29
+ Requires-Dist: pyyaml<7,>=6.0
30
+ Requires-Dist: pdfplumber~=0.11
31
+ Requires-Dist: numpy<3,>=2.4.1
32
+ Requires-Dist: platformdirs<5,>=4.0
33
+ Requires-Dist: SQLAlchemy<3.0,>=2.0
34
+ Provides-Extra: gui
35
+ Requires-Dist: pyqtgraph<0.14,>=0.13; extra == "gui"
36
+ Requires-Dist: PySide6<7,>=6.6; extra == "gui"
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=7.4; extra == "dev"
39
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
40
+ Requires-Dist: pytest-qt>=4.2; extra == "dev"
41
+ Requires-Dist: responses>=0.24; extra == "dev"
42
+ Requires-Dist: ruff>=0.15.0; extra == "dev"
43
+ Provides-Extra: release
44
+ Requires-Dist: build>=1.2; extra == "release"
45
+ Requires-Dist: twine>=5.1; extra == "release"
46
+ Dynamic: license-file
47
+
48
+ # Insider Scanner
49
+
50
+ [![PyPI](https://img.shields.io/pypi/v/insider-scanner.svg)](https://pypi.org/project/insider-scanner/)
51
+ [![CI](https://github.com/Czarnak/insider-scanner/actions/workflows/ci.yml/badge.svg)](https://github.com/Czarnak/insider-scanner/actions/workflows/ci.yml)
52
+ [![Python](https://img.shields.io/pypi/pyversions/insider-scanner.svg)](https://pypi.org/project/insider-scanner/)
53
+
54
+ Scan insider trades from **secform4.com**, **openinsider.com**, **SEC EDGAR**, and European regulators (FCA, BaFin, AMF, AFM). Includes congressional financial disclosure scanning (House and Senate), multi-source deduplication, committee-based sector filtering, and a desktop GUI with EDGAR filing links plus a European scan workspace.
55
+
56
+ ## Install
57
+
58
+ Install the base package for the command-line interface:
59
+
60
+ ```bash
61
+ pip install insider-scanner
62
+ insider-scanner-cli --help
63
+ ```
64
+
65
+ Install the optional GUI dependencies for the desktop application:
66
+
67
+ ```bash
68
+ pip install "insider-scanner[gui]"
69
+ insider-scanner
70
+ ```
71
+
72
+ ## Run
73
+
74
+ ```bash
75
+ insider-scanner-cli --help # command-line interface
76
+ insider-scanner # desktop GUI (requires the gui extra)
77
+ ```
78
+
79
+ ## Development Setup
80
+
81
+ ```bash
82
+ git clone https://github.com/Czarnak/insider-scanner.git
83
+ cd insider-scanner
84
+ pip install -e ".[gui,dev]"
85
+ ```
86
+
87
+ ### Requirements
88
+
89
+ Python 3.11+. Core dependencies include `requests`, `beautifulsoup4`, `lxml`,
90
+ `pandas`, `pyyaml`, `pdfplumber`, `numpy`, `platformdirs`, and SQLAlchemy.
91
+ The `gui` extra adds `PySide6` and `pyqtgraph`.
92
+
93
+ ---
94
+
95
+ ## Usage
96
+
97
+ ### GUI
98
+
99
+ ```bash
100
+ insider-scanner
101
+ # or
102
+ python -m insider_scanner.main
103
+ ```
104
+
105
+ The GUI tabs cover the core use cases:
106
+
107
+ #### Insider Scan tab
108
+
109
+ - Search a ticker and run both secform4.com and openinsider.com scrapers in one click.
110
+ - Fetch the latest trades (configurable count) and run watchlist scans backed by the user data directory's `tickers_watchlist.txt`.
111
+ - Toggle sources, specify a date range, trade type, minimum value, or Congress-only filter.
112
+ - View sortable tables that display filing/trade dates, highlight congressional filings, show EDGAR links, and let you export CSV/JSON.
113
+ - Cancel long-running scans and resolve any ticker to its SEC CIK + filing page.
114
+
115
+ ![insider_scanners](img/insiderTab.png)
116
+
117
+ #### Congress Scan tab
118
+
119
+ - Pick a legislator (House/Senate dropdown) or the whole committee list, select sources (House/Senate), and preview results in a threaded worker with progress + cancel.
120
+ - Use filters such as trade type, sector, and minimum value, then double-click any row to open the original PDF/PTR.
121
+ - Save filtered results to CSV/JSON, with exports reflecting the current filters.
122
+
123
+ ![insider_scanners](img/congressTab.png)
124
+
125
+ #### European Insiders tab
126
+
127
+ - Choose All/UK/DE/FR/NL, type an ISIN, or scan the user data directory's `eu_watchlist.txt`.
128
+ - Enable optional date bounds, filter by trade type and minimum value, and watch the progress bar while each ISIN is processed.
129
+ - Results are sortable, show normalized positions/currency, provide detail text on double-click, and allow opening the regulator source URL.
130
+ - Save filtered results to CSV/JSON (filename reflects the ISIN + country) or clear filters to adjust the view.
131
+
132
+ ![insider_scanners](img/europeanTab.png)
133
+
134
+ #### Analysis tab
135
+
136
+ - Select a US ticker from the dropdown list and click "Load Chart".
137
+ - Visualizes the past 2 years of daily price history using an interactive line graph with crosshairs.
138
+ - Overlays insider trade markers directly on the price line: corporate trades and congressional disclosures are both shown.
139
+ - Hover over trade markers to view detailed tooltips including insider name, role, trade size, and total value.
140
+ - The chart supports standard panning and zooming via pyqtgraph.
141
+
142
+ ![insider_scanners](img/analysisTab.png)
143
+
144
+ #### Appearance & themes
145
+
146
+ - A single semantic design-token system drives both **light** and **dark** themes (WCAG AA contrast).
147
+ - Switch via **View ▸ Theme** (System / Light / Dark); the choice is remembered between sessions, and **System** follows the OS color scheme.
148
+ - Transaction colors are semantic and always paired with text/sign (green = purchase, red = sale, amber = flagged/congress); tickers, dates, and monetary values use a monospace face for tabular alignment.
149
+ - The UI bundles the open-source **Inter** (sans) and **JetBrains Mono** (monospace) fonts, falling back to system fonts (Segoe UI / Consolas) when unavailable.
150
+
151
+ ### CLI
152
+
153
+ ```bash
154
+ # Scan a specific ticker
155
+ insider-scanner-cli scan AAPL
156
+ insider-scanner-cli scan AAPL --type Buy --min-value 1000000 --save
157
+
158
+ # Scan with date range
159
+ insider-scanner-cli scan AAPL --since 2025-01-01 --until 2025-06-30
160
+
161
+ # Fetch latest insider trades
162
+ insider-scanner-cli latest --count 50 --save
163
+ insider-scanner-cli latest --since 2025-06-01 --until 2025-06-30
164
+
165
+ # Resolve SEC CIK
166
+ insider-scanner-cli resolve-cik AAPL
167
+
168
+ # Initialize default Congress member list
169
+ insider-scanner-cli init-congress
170
+
171
+ # Congress-only filter
172
+ insider-scanner-cli scan AAPL --congress-only
173
+
174
+ # Import legacy JSON exports into SQLite
175
+ insider-scanner-cli import-legacy ./old-exports
176
+ insider-scanner-cli import-legacy ./large.json --max-file-size-mib 100
177
+ ```
178
+
179
+ ### European scan CLI
180
+
181
+ ```bash
182
+ # Scan a single ISIN or run the built-in watchlist
183
+ insider-scanner-cli eu-scan GB0002875804
184
+ insider-scanner-cli eu-scan --watchlist --country UK --min-value 50000 --save
185
+ ```
186
+
187
+ Pass `--country` to restrict to `UK`/`DE`/`FR`/`NL` (default: `All`), `--type` to filter `Buy`/`Sell` trades, `--min-value` for the total reported value, and `--since`/`--until` for date bounds. Use `--watchlist` to scan every configured ISIN, and `--save` to persist the filtered CSV/JSON bundle.
188
+
189
+ ### Local persistence and paths
190
+
191
+ Parsed US, Congress, and European trades are stored in a local SQLite database.
192
+ Writable data, cache, watchlist, House disclosure, and export paths are resolved
193
+ with `platformdirs`; the application does not write runtime data into the source
194
+ checkout. Print the paths selected for the current OS and user with:
195
+
196
+ ```bash
197
+ python -c "from insider_scanner.utils.config import DEFAULT_PATHS; print(DEFAULT_PATHS)"
198
+ ```
199
+
200
+ The SQLite database stores normalized trade records, successful source/date
201
+ coverage, and latest-refresh state. HTTP caches remain transient files under the
202
+ platform cache directory and can be removed without deleting parsed trades.
203
+
204
+ For bounded scans, the service queries SQLite first, calculates uncovered
205
+ intervals independently for each source, fetches only those gaps, and then
206
+ returns the merged local result. Failed or cancelled intervals are not marked as
207
+ covered. AMF search bounds are publication dates rather than transaction dates,
208
+ so bounded French scans conservatively persist and filter returned trades but do
209
+ not mark the requested trade-date interval as covered. Repeating an AMF bounded
210
+ scan therefore rechecks the regulator. RNS, BaFin, and AFM bounded scans retain
211
+ normal coverage caching. Latest scans reuse a successful refresh for one hour by default;
212
+ different requested counts have independent refresh state. European latest mode
213
+ uses only sources with a latest endpoint. AFM remains available for bounded
214
+ Netherlands scans but is excluded from latest refreshes.
215
+
216
+ `import-legacy PATH` accepts a JSON file or recursively scans a directory for
217
+ JSON exports. Imports are validated, idempotent, do not modify source files, and
218
+ do not alter scan coverage or refresh state. Each file is limited to 50 MiB by
219
+ default; use `--max-file-size-mib` to set a different positive limit. Any
220
+ malformed, oversized, or invalid record produces a nonzero exit code.
221
+
222
+ Database initialization and migration failures stop GUI or CLI startup rather
223
+ than continuing with partial state. User-facing errors remain concise; detailed
224
+ diagnostics are sent through application logging.
225
+
226
+ #### Database migration recovery
227
+
228
+ The database is `insider_scanner.sqlite3` under the platform user data directory
229
+ reported by `DEFAULT_PATHS.database_file`. Typical locations are
230
+ `%LOCALAPPDATA%\Insider Scanner\Insider Scanner\` on Windows,
231
+ `~/Library/Application Support/Insider Scanner/` on macOS, and
232
+ `~/.local/share/Insider Scanner/` on Linux.
233
+
234
+ If startup reports a database or migration failure:
235
+
236
+ 1. Close every Insider Scanner GUI and CLI process.
237
+ 2. Back up `insider_scanner.sqlite3` before making any recovery change.
238
+ 3. Do not edit `schema_version`, tables, indexes, or other schema objects manually.
239
+ 4. Restore a known-good database backup, or rename the corrupt database and
240
+ restart the application to build a fresh database.
241
+ 5. Optionally repopulate the fresh database with
242
+ `insider-scanner-cli import-legacy PATH` using validated legacy JSON exports.
243
+
244
+ ---
245
+
246
+ ## Architecture
247
+
248
+ ```
249
+ src/insider_scanner/
250
+ ├── core/
251
+ │ ├── models.py # InsiderTrade + CongressTrade dataclasses
252
+ │ ├── secform4.py # secform4.com scraper (compound-column parser, direct filing links)
253
+ │ ├── openinsider.py # openinsider.com HTML parser + scraper
254
+ │ ├── edgar.py # SEC EDGAR CIK resolver (JSON primary + HTML fallback) + filing URLs
255
+ │ ├── senate.py # Congress member list + trade flagging
256
+ │ ├── congress_house.py # House financial disclosures (ZIP index + PTR PDF parsing)
257
+ │ ├── congress_senate.py # Senate EFD scraper (session + search + PTR page parsing)
258
+ │ ├── merger.py # Multi-source dedup, filtering, export
259
+ │ ├── afm.py # Dutch AFM API client
260
+ │ ├── amf.py # French AMF BDIF API client
261
+ │ ├── bafin.py # German BaFin download+CSV parser
262
+ │ ├── eu_models.py # European trade dataclass + helpers
263
+ │ ├── eu_merger.py # European dedup/filter/export helpers
264
+ │ ├── eu_scan.py # Dispatcher that runs the selected European scrapers
265
+ │ └── rns_investegate.py # UK RNS announcements via Investegate
266
+ ├── gui/
267
+ │ ├── main_window.py # Main window (default OS style + tab management)
268
+ │ ├── scan_tab.py # Insider scan workflow: search, filters, table, EDGAR links
269
+ │ ├── congress_tab.py # Congress tab with sector filtering and save/export helpers
270
+ │ ├── european_tab.py # European tab with ISIN/watchlist scans and detail panel
271
+ │ ├── analysis_tab.py # Analysis tab integrating price history and trade markers
272
+ │ ├── price_chart.py # PyQtGraph price chart widget with crosshairs and scatter overlays
273
+ │ └── widgets.py # Pandas table model, sortable proxy, table helpers
274
+ ├── persistence/
275
+ │ ├── schema.py # SQLAlchemy Core table definitions
276
+ │ ├── bootstrap.py # Versioned SQLite initialization and migrations
277
+ │ ├── repositories.py # Immutable trade upserts and queries
278
+ │ ├── coverage.py # Source-aware covered intervals and gap calculation
279
+ │ └── refresh.py # Latest-scan freshness state
280
+ ├── resources/
281
+ │ └── seeds/ # Packaged default watchlists and Congress members
282
+ ├── services/
283
+ │ ├── application.py # Shared GUI/CLI service composition
284
+ │ ├── us.py # DB-first US scan orchestration
285
+ │ ├── congress.py # DB-first Congress scan orchestration
286
+ │ ├── european.py # DB-first European scan orchestration
287
+ │ └── importer.py # Validated legacy JSON import
288
+ ├── utils/
289
+ │ ├── config.py # Paths, SEC/User-Agent constants, watchlists
290
+ │ ├── logging.py # Logging setup
291
+ │ ├── caching.py # File cache with TTL expiry
292
+ │ ├── http.py # Rate-limited HTTP helper
293
+ │ └── threading.py # Worker/Signal helpers for GUI
294
+ ├── main.py # GUI entry point
295
+ └── cli.py # CLI entry point (scan, latest, EU, import)
296
+
297
+ scripts/
298
+ └── update_congress.py # Fetch current federal + state legislators
299
+ ```
300
+
301
+ ### Data Flow — Insider Trades
302
+
303
+ 1. **Resolve**: `edgar.py` resolves ticker → CIK via SEC `company_tickers.json` (cached 24h, HTML fallback)
304
+ 2. **Scrape**: `secform4.py` fetches CIK-based pages with compound-column parsing (date+type, name+title split by `<br>`); `openinsider.py` fetches ticker-based pages; both produce `InsiderTrade` records
305
+ 3. **Persist**: parsed records are upserted into SQLite; successful source/date intervals are recorded separately
306
+ 4. **Cache**: raw HTTP responses are cached independently with configurable TTL (default 1h)
307
+ 5. **Merge**: `merger.py` deduplicates trades across sources (matching by ticker + name + date + share count)
308
+ 6. **Flag**: `senate.py` checks insider names against the Congress member list (fuzzy matching)
309
+ 7. **Verify**: secform4 trades include direct SEC filing links; others get generated EDGAR search URLs
310
+ 8. **Export**: results are saved as CSV + JSON under the platform user data directory
311
+
312
+ ### Data Flow — Congress (House)
313
+
314
+ 1. **Index**: `congress_house.py` downloads yearly ZIP archives from `disclosures-clerk.house.gov` containing XML indexes of all financial disclosure filings. Past years are cached permanently; current year can be refreshed on demand.
315
+ 2. **Search**: XML index is parsed to find PTR (Periodic Transaction Report) filings matching the selected official and date range. Multi-year ranges download multiple indexes as needed.
316
+ 3. **Fetch**: Individual PTR PDFs are downloaded and cached under `house_disclosures/{year}/pdfs/` in the platform user data directory.
317
+ 4. **Parse**: `pdfplumber` extracts transaction tables from electronically-filed PDFs. Scanned/handwritten PDFs are detected and skipped.
318
+ 5. **Convert**: Raw table rows are converted to `CongressTrade` records with parsed tickers (from asset descriptions), normalized amount ranges, owner codes, and transaction types.
319
+
320
+ ### Data Flow — Congress (Senate)
321
+
322
+ 1. **Session**: `congress_senate.py` establishes an authenticated session with `efdsearch.senate.gov` by accepting the prohibition agreement and obtaining a CSRF token.
323
+ 2. **Search**: POST to the EFD JSON API with senator name, report type (PTR), and date range. Results include links to individual PTR pages. Paper filings (scanned PDFs) are automatically filtered out.
324
+ 3. **Parse**: Each electronic PTR page contains an HTML table with columns for transaction date, owner, ticker, asset name, type, amount range, and comment. These are parsed via BeautifulSoup.
325
+ 4. **Convert**: Transactions are converted to `CongressTrade` records. Tickers are read directly from the "Ticker" column when available; when the ticker is "--", it's extracted from the asset description (e.g. "Vanguard ETF (BND)" → BND).
326
+
327
+ ### Data Flow — European Insider Trades (UK / DE / FR / NL)
328
+
329
+ 1. **Search**: `rns_investegate` queries Investegate for UK Director/PDMR announcements; `bafin`, `amf`, and `afm` POST/GET the regulators' portals for Germany, France, and the Netherlands respectively, honoring optional date bounds and ISIN filters.
330
+ 2. **Parse**: Each scraper normalizes the (possibly localized) position text, currency formatting, and trade/filing dates before emitting `EuropeanInsiderTrade` records.
331
+ 3. **Merge**: `eu_scan.scrape_eu_trades_for_isin` runs the requested sources, `eu_merger.merge_eu_trades` deduplicates across regulators, and `eu_merger.filter_eu_trades` applies the GUI/CLI filters (country, trade type, min value, date range).
332
+ 4. **Save**: The European tab (and `eu-scan`) use `eu_merger.save_eu_results` to output labeled CSV/JSON bundles after the filters are applied.
333
+
334
+ ### Price History & Analysis Tab
335
+
336
+ The application integrates daily price history from Yahoo Finance via an unofficial API using session handling, configurable user-agents, and exponential backoff to respect rate limits and bypass blocks.
337
+
338
+ 1. **Fetch**: `insider_scanner.core.prices` requests adjusted daily price bars over a specified date range.
339
+ 2. **Persist**: Price bars and missing gaps are cached locally in the `price_history` SQLite table.
340
+ 3. **Visualize**: The **Analysis tab** merges local insider trades (`us_trades` and `congress_trades`) with the price history to provide an interactive visual overlay using `pyqtgraph`.
341
+ 4. **Resilience**: The backend parser gracefully handles missing Yahoo data, challenge pages, malformed timestamps, and invalid JSON structures.
342
+
343
+ ### Congress Tab — GUI Integration
344
+
345
+ The **Congress Scan** tab provides a full GUI workflow for scanning congressional financial disclosures:
346
+
347
+ - **Official selection**: searchable dropdown populated from `congress_members.json`, with an "All" option
348
+ - **Source checkboxes**: independently toggle House and Senate scrapers
349
+ - **Date range**: optional filing date filter
350
+ - **Filters**: trade type (Purchase/Sale/Exchange), minimum dollar amount, and committee-based sector filtering
351
+ - **Background scanning**: threaded execution with progress bar and cancellable stop button
352
+ - **Results table**: sortable columns for filing date, trade date, official, chamber, ticker, asset, type, owner, amount range, and source
353
+ - **Detail panel**: double-click a row to see full details including official's committee sectors
354
+ - **Open Filing**: launches the original disclosure page (House PDF or Senate PTR) in browser
355
+ - **Save**: exports filtered results to CSV + JSON
356
+
357
+ ### SEC EDGAR Compliance
358
+
359
+ All EDGAR requests use a proper `User-Agent` header and are rate-limited to 10 requests/second as required by SEC policy. The User-Agent is configurable via the `SEC_USER_AGENT` environment variable.
360
+
361
+ ---
362
+
363
+ ## Runtime Files
364
+
365
+ | Location within runtime paths | Description |
366
+ | ------ | ------------- |
367
+ | `insider_scanner.sqlite3` | Parsed trades, coverage intervals, refresh state, and schema version |
368
+ | `congress_members.json` | Editable Congress member list with committee assignments and sector mappings |
369
+ | `tickers_watchlist.txt` | Editable ticker watchlist |
370
+ | `eu_watchlist.txt` | Editable European ISIN watchlist |
371
+ | `house_disclosures/` | Cached House disclosure indexes and PDFs |
372
+ | `exports/scans/` | Generated CSV/JSON scan exports |
373
+ | platform cache directory | Transient EDGAR and scraper HTTP caches |
374
+
375
+ Packaged defaults are copied into the user data directory only when a file is
376
+ missing; existing user edits are never overwritten. `eu_watchlist.txt` stores
377
+ one 12-character ISIN per line (comments start with `#`).
378
+
379
+ The Congress member list is populated by `scripts/update_congress.py` and includes committee assignments and sector mappings derived from the [unitedstates/congress-legislators](https://github.com/unitedstates/congress-legislators) project.
380
+
381
+ ### Congress Data Model
382
+
383
+ Congress financial disclosures differ from standard insider trades. Instead of exact transaction values, they report dollar ranges (e.g. "$1,001 – $15,000"). The `CongressTrade` dataclass in `models.py` handles this with `amount_range` (original string), `amount_low` and `amount_high` (parsed floats), plus fields for `owner` (Self/Spouse/Dependent Child/Joint), `asset_description`, and `comment`.
384
+
385
+ ### Committee → Sector Mapping
386
+
387
+ Each federal legislator is assigned one or more sectors based on their committee assignments. Committees are mapped to sectors via keyword matching (e.g. "Armed Services" → Defense, "Financial Services" → Finance). The available sectors are: Defense, Energy, Finance, Technology, Healthcare, Industrials, and Other. The `sector` field is a list — for example, a member serving on both Armed Services and Financial Services is tagged as `["Defense", "Finance"]`. "Other" is only included when no higher-priority sector applies.
388
+
389
+ **Limitation**: Family member financial disclosures (spouses, children) are not publicly machine-readable and would require paid data services. This is a known limitation documented here.
390
+
391
+ ---
392
+
393
+ ## Scripts
394
+
395
+ Standalone utility scripts live in `scripts/`.
396
+
397
+ ### `update_congress.py`
398
+
399
+ Fetches the current list of federal and (optionally) state legislators, enriches them with committee assignments and sector mappings, and writes them to the configured `congress_members.json` in the platform user data directory.
400
+
401
+ ```bash
402
+ # Federal only with committee enrichment (no API key needed)
403
+ python scripts/update_congress.py
404
+
405
+ # Federal + state legislators (requires free Open States API key)
406
+ OPENSTATES_API_KEY=your_key python scripts/update_congress.py --include-state
407
+
408
+ # Skip committee enrichment
409
+ python scripts/update_congress.py --no-committees
410
+
411
+ # Preview without saving
412
+ python scripts/update_congress.py --dry-run
413
+
414
+ # Custom output path
415
+ python scripts/update_congress.py --output /path/to/members.json
416
+ ```
417
+
418
+ Federal data and committee assignments come from the [unitedstates/congress-legislators](https://github.com/unitedstates/congress-legislators) project (public domain, community-maintained YAML). State data uses the [Open States API](https://v3.openstates.org) (free key required).
419
+
420
+ ---
421
+
422
+ ## Contributing
423
+
424
+ We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to set up the development environment, run tests, and add new scraping sources.
425
+
426
+ ---
427
+
428
+ ## License
429
+
430
+ MIT
431
+
432
+ Bundled UI fonts are licensed separately under the SIL Open Font License 1.1:
433
+ [Inter](src/insider_scanner/resources/fonts/OFL-Inter.txt) and
434
+ [JetBrains Mono](src/insider_scanner/resources/fonts/OFL-JetBrainsMono.txt).
435
+
436
+ ---
437
+
438
+ *Created with Claude AI*