philoch-bib-sdk 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/Cargo.lock +1 -1
  2. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/Cargo.toml +1 -1
  3. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/PKG-INFO +3 -2
  4. philoch_bib_sdk-0.5.0/docs/generic-style-guide.md +648 -0
  5. philoch_bib_sdk-0.5.0/philoch_bib_sdk/adapters/api/__init__.py +642 -0
  6. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/adapters/io/csv/__init__.py +4 -7
  7. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/adapters/io/ods/__init__.py +7 -7
  8. philoch_bib_sdk-0.5.0/philoch_bib_sdk/converters/api/__init__.py +508 -0
  9. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/bibkey_parser.py +14 -6
  10. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/formatter.py +1 -3
  11. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/parser.py +6 -6
  12. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/journal/formatter.py +3 -3
  13. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/shared/renderable_formatter.py +6 -4
  14. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/functions/comparator.py +2 -2
  15. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/functions/fuzzy_matcher.py +26 -43
  16. philoch_bib_sdk-0.5.0/philoch_bib_sdk/procedures/import_to_api.py +662 -0
  17. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/poetry.lock +139 -51
  18. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/pyproject.toml +22 -2
  19. philoch_bib_sdk-0.5.0/scripts/import_ods_to_api.py +140 -0
  20. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/conftest.py +4 -5
  21. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_bibitem_parser.py +31 -31
  22. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_bibkey_parser.py +2 -0
  23. philoch_bib_sdk-0.5.0/tests/data/test_import.csv +4 -0
  24. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/logic/functions/test_fuzzy_matcher.py +3 -8
  25. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/logic/test_default_models.py +0 -6
  26. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/logic/test_setup.py +0 -16
  27. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/LICENSE +0 -0
  28. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/README.md +0 -0
  29. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/fuzzy-matching.md +0 -0
  30. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/python-style-guide.md +0 -0
  31. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/rust-implementation-summary.md +0 -0
  32. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/rust-index-building-spec.md +0 -0
  33. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/rust-scorer.md +0 -0
  34. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/streaming-output.md +0 -0
  35. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/todo/fuzzy-matching-enhanced-output.md +0 -0
  36. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/todo/merge_fuzzy_results.py +0 -0
  37. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/docs/todo/rust-build-index-implementation-plan.md +0 -0
  38. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/__init__.py +0 -0
  39. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/_rust.pyi +0 -0
  40. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/adapters/io/__init__.py +0 -0
  41. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/adapters/plaintext/bibitem_reader.py +0 -0
  42. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/adapters/tabular_data/read_journal_volume_number_index.py +0 -0
  43. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/latex.py +0 -0
  44. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/author/formatter.py +0 -0
  45. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/author/parser.py +0 -0
  46. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bib_string_formatter.py +0 -0
  47. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/bibkey_formatter.py +0 -0
  48. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/date_formatter.py +0 -0
  49. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/date_parser.py +0 -0
  50. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/pages_formatter.py +0 -0
  51. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/bibitem/pages_parser.py +0 -0
  52. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/converters/plaintext/journal/parser.py +0 -0
  53. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/interfaces/cli/__init__.py +0 -0
  54. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/interfaces/cli/fuzzy_matching.py +0 -0
  55. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/__init__.py +0 -0
  56. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/default_models.py +0 -0
  57. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/functions/__init__.py +0 -0
  58. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/functions/journal_article_matcher.py +0 -0
  59. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/literals.py +0 -0
  60. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/models.py +0 -0
  61. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/logic/models_staging.py +0 -0
  62. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/procedures/fuzzy_matching.py +0 -0
  63. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/philoch_bib_sdk/py.typed +0 -0
  64. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/run_fuzzy_matching.py +0 -0
  65. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/run_fuzzy_matching_streaming.py +0 -0
  66. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/scripts/format.py +0 -0
  67. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/src/lib.rs +0 -0
  68. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/adapters/test_read_jvn_index_from_ods.py +0 -0
  69. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_author_formatter.py +0 -0
  70. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_author_parser.py +0 -0
  71. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_bibitem_formatter.py +0 -0
  72. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_bibkey_formatter.py +0 -0
  73. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_date_formatter.py +0 -0
  74. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_date_parser.py +0 -0
  75. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_journal_formatter.py +0 -0
  76. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_journal_parser.py +0 -0
  77. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_page_formatter.py +0 -0
  78. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/converters/plaintext/test_page_parser.py +0 -0
  79. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/logic/functions/test_comparator.py +0 -0
  80. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/logic/functions/test_journal_article_matcher.py +0 -0
  81. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/logic/test_models.py +0 -0
  82. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/processing/test_bulk_operation_styles.py +0 -0
  83. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/shared.py +0 -0
  84. {philoch_bib_sdk-0.4.2 → philoch_bib_sdk-0.5.0}/tests/test_tautology.py +0 -0
@@ -108,7 +108,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
108
108
 
109
109
  [[package]]
110
110
  name = "philoch_bib_sdk"
111
- version = "0.4.2"
111
+ version = "0.5.0"
112
112
  dependencies = [
113
113
  "ahash",
114
114
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "philoch_bib_sdk"
3
- version = "0.4.2"
3
+ version = "0.5.0"
4
4
  edition = "2021"
5
5
  readme = "README.md"
6
6
 
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: philoch-bib-sdk
3
- Version: 0.4.2
4
- Requires-Dist: aletk>=0.1.6
3
+ Version: 0.5.0
4
+ Requires-Dist: aletk>=0.1.8
5
5
  Requires-Dist: attrs>=25.3.0
6
6
  Requires-Dist: polars>=1.32.3
7
7
  Requires-Dist: pydantic>=2.11.9
8
8
  Requires-Dist: cytoolz>=1.0.1
9
+ Requires-Dist: httpx>=0.28.0
9
10
  License-File: LICENSE
10
11
  Summary: Standard development kit for the Philosophie Bibliography project
11
12
  Author-email: Luis Alejandro Bordo García <luis.bordo@philosophie.ch>
@@ -0,0 +1,648 @@
1
+ # Python Style Guide
2
+
3
+ An opinionated set of Python standards centered on strict typing, immutability, and functional architecture.
4
+
5
+ ## Type Safety
6
+
7
+ Enforce strict type safety using mypy with the `--strict` flag.
8
+
9
+ ### Requirements
10
+
11
+ - All functions must have complete type annotations for parameters and return values
12
+ - No use of `Any` type unless justified with a comment explaining why and peer-reviewed
13
+ - No use of `cast()` calls
14
+ - No use of `# type: ignore` comments unless justified with a comment explaining why and peer-reviewed
15
+ - All type errors must be resolved properly through type narrowing or proper type design
16
+
17
+ ### Type Narrowing
18
+
19
+ When dealing with union types, use explicit type checking:
20
+
21
+ ```python
22
+ value = record.field
23
+ if isinstance(value, ExpectedType):
24
+ result = value.attribute # Type narrowed, safe to access
25
+ ```
26
+
27
+ ### Preserve Type Safety - Never Convert to Dicts
28
+
29
+ **CRITICAL**: Do not convert typed objects to dictionaries to access attributes. This loses all type safety.
30
+
31
+ ```python
32
+ # NEVER DO THIS - loses type safety
33
+ data = record.model_dump() # or __dict__ or dict(record)
34
+ name = data.get("name", "") # Type checker cannot verify this
35
+
36
+ # ALWAYS DO THIS - preserves type safety
37
+ name_field = record.name
38
+ if isinstance(name_field, NameType):
39
+ name = name_field.value
40
+ else:
41
+ name = ""
42
+ ```
43
+
44
+ Reasons:
45
+ - Dictionary access bypasses type checking completely
46
+ - Typos in keys are not caught by mypy
47
+ - Attribute renames do not update dictionary keys automatically
48
+ - Type narrowing is lost, leading to runtime errors
49
+
50
+ Always access attributes directly and use isinstance() for type narrowing.
51
+
52
+ ### Forward References
53
+
54
+ Use `TYPE_CHECKING` for imports that are only needed for type annotations to avoid circular imports:
55
+
56
+ ```python
57
+ from typing import TYPE_CHECKING
58
+
59
+ if TYPE_CHECKING:
60
+ from mypackage.models import SomeModel
61
+ ```
62
+
63
+ ## Pydantic at the Boundaries
64
+
65
+ Use Pydantic models exclusively at system boundaries - the primary side (user input, CLI arguments, configuration files) and the secondary side (API responses, database rows, external service payloads). Pydantic's validation overhead is justified here because this is where untrusted data enters the system.
66
+
67
+ ```python
68
+ # Primary side: parsing user input
69
+ class CreateUserRequest(BaseModel):
70
+ model_config = ConfigDict(strict=True)
71
+ name: str
72
+ email: EmailStr
73
+
74
+ # Secondary side: parsing an external API response
75
+ class ExternalPayload(BaseModel):
76
+ model_config = ConfigDict(strict=True)
77
+ id: int
78
+ status: str
79
+ ```
80
+
81
+ Once data crosses a boundary and is validated, convert it into lightweight internal representations (e.g., frozen attrs classes, named tuples, or plain typed values) for all further processing. Do not pass Pydantic models through core business logic:
82
+
83
+ ```python
84
+ # At the boundary: validate, then convert
85
+ request = CreateUserRequest.model_validate(raw_input)
86
+ user = User(name=request.name, email=request.email) # attrs/dataclass
87
+
88
+ # Inside the core: work with lightweight, validated data
89
+ def process_user(user: User) -> Result:
90
+ ...
91
+ ```
92
+
93
+ This approach gives you:
94
+ - **Fail-fast guarantees** - malformed data is rejected immediately at the edges
95
+ - **Runtime consistency** - everything inside the core is already validated
96
+ - **No hidden overhead** - Pydantic validation runs once, not on every function call
97
+ - **Clean separation** - boundary concerns (parsing, serialization) stay out of business logic
98
+
99
+ ## Data Structures and Idioms
100
+
101
+ The following patterns are preferred for immutability and clarity:
102
+
103
+ 1. **Prefer tuples over lists** for sequences that do not need mutation
104
+ ```python
105
+ items = tuple(process(x) for x in source) # Preferred
106
+ items = [process(x) for x in source] # Avoid
107
+ ```
108
+
109
+ 2. **Prefer `FrozenSet` over `Set`** for immutable unique collections
110
+
111
+ 3. **Prefer comprehensions over explicit loops** when the logic is straightforward
112
+ ```python
113
+ # Preferred
114
+ results = {key: frozenset(items) for key, items in mapping.items()}
115
+
116
+ # Avoid
117
+ results = {}
118
+ for key, items in mapping.items():
119
+ results[key] = frozenset(items)
120
+ ```
121
+
122
+ 4. **Use set operations** for collection operations
123
+ ```python
124
+ # Preferred
125
+ candidates.update(index[key])
126
+
127
+ # Avoid
128
+ for item in index[key]:
129
+ candidates.add(item)
130
+ ```
131
+
132
+ 5. **Use `frozen=True` and `slots=True`** on data classes for immutability and memory efficiency (e.g., `attrs.define(frozen=True, slots=True)` or `@dataclass(frozen=True, slots=True)`)
133
+
134
+ 6. **Use `Enum` or `StrEnum` for closed sets of values** — this enables exhaustive `match` checking via mypy's `exhaustive-match` error code
135
+ ```python
136
+ from enum import StrEnum
137
+
138
+ class Status(StrEnum):
139
+ ACTIVE = "active"
140
+ INACTIVE = "inactive"
141
+ PENDING = "pending"
142
+
143
+ def handle(status: Status) -> str:
144
+ match status:
145
+ case Status.ACTIVE: return "go"
146
+ case Status.INACTIVE: return "stop"
147
+ case Status.PENDING: return "wait"
148
+ # mypy error if a case is missing
149
+ ```
150
+
151
+ ## Performance
152
+
153
+ ### Avoid N+1 Problems
154
+
155
+ Never perform I/O inside a loop when a batch operation is available. This is the single most common performance mistake:
156
+
157
+ ```python
158
+ # NEVER DO THIS - N+1: one query per item
159
+ results = tuple(fetch(item_id) for item_id in item_ids)
160
+
161
+ # ALWAYS DO THIS - single batch call
162
+ results = batch_fetch(item_ids)
163
+ ```
164
+
165
+ The same applies to HTTP calls, file reads, and any other I/O. If you are calling an external service per item, look for a batch endpoint or gather inputs first.
166
+
167
+ ### Use Appropriate Data Structures for Lookups
168
+
169
+ Choose data structures based on access patterns:
170
+
171
+ ```python
172
+ # Membership testing: use a set, not a list
173
+ valid_ids: frozenset[int] = frozenset(load_valid_ids())
174
+ if item_id in valid_ids: # O(1)
175
+ ...
176
+
177
+ # Keyed access: use a dict, not linear search
178
+ users_by_id: dict[int, User] = {u.id: u for u in users}
179
+ user = users_by_id[target_id] # O(1)
180
+
181
+ # Avoid: scanning a list for every lookup — O(n) per call
182
+ user = next(u for u in users if u.id == target_id)
183
+ ```
184
+
185
+ ### Avoid Nested Loops over Large Collections
186
+
187
+ Nested iteration over two large collections is O(n*m). Restructure with index lookups:
188
+
189
+ ```python
190
+ # Avoid - O(n * m)
191
+ matched = tuple(
192
+ (o, p)
193
+ for o in orders
194
+ for p in products
195
+ if o.product_id == p.id
196
+ )
197
+
198
+ # Preferred - O(n + m): build an index, then join
199
+ products_by_id = {p.id: p for p in products}
200
+ matched = tuple(
201
+ (o, products_by_id[o.product_id])
202
+ for o in orders
203
+ if o.product_id in products_by_id
204
+ )
205
+ ```
206
+
207
+ ### Prefer Generators for Large Pipelines
208
+
209
+ When processing large datasets, use generator expressions to keep memory usage constant. Each item flows through the entire chain before the next is pulled:
210
+
211
+ ```python
212
+ # Constant memory - items processed one at a time
213
+ validated = (validate(item) for item in raw_items)
214
+ transformed = (transform(item) for item in validated)
215
+ write_output(transformed)
216
+
217
+ # Avoid - loads entire dataset into memory at each step
218
+ validated = [validate(item) for item in raw_items]
219
+ transformed = [transform(item) for item in validated]
220
+ write_output(transformed)
221
+ ```
222
+
223
+ See the [Streaming with Chained Generators](#streaming-with-chained-generators) subsection for the full pattern.
224
+
225
+ ### Profile Before Optimizing
226
+
227
+ Do not guess at bottlenecks. Measure first with `cProfile` or `line_profiler`, then optimize the hot path:
228
+
229
+ ```bash
230
+ python -m cProfile -s cumtime my_script.py
231
+ ```
232
+
233
+ ## Functional Architecture (Hexagonal / Ports & Adapters)
234
+
235
+ Follow hexagonal architecture principles using functional programming. The key insight: **hexagonal architecture doesn't require OOP** - function signatures serve as interfaces.
236
+
237
+ ### Core Principles
238
+
239
+ 1. **Business logic doesn't depend on I/O details**
240
+ 2. **Dependencies point inward** - concrete implementations depend on abstract interfaces
241
+ 3. **Ports define what you need** - type aliases for function signatures
242
+ 4. **Adapters provide how** - concrete implementations matching those signatures
243
+
244
+ ### Defining Ports (Abstract Interfaces)
245
+
246
+ Use type aliases to define the "shape" of functions your core logic needs:
247
+
248
+ ```python
249
+ from typing import Callable, Generator
250
+
251
+ # Port: what the core logic needs (abstract)
252
+ type TItemReader[ReaderIn] = Callable[[ReaderIn], Generator[Item, None, None]]
253
+ type TItemWriter[WriterIn] = Callable[[Generator[Item, None, None], WriterIn], None]
254
+ type TTransform = Callable[[str], str]
255
+ ```
256
+
257
+ The type signature **is** the contract. Any function matching that signature can be injected.
258
+
259
+ ### Implementing Adapters (Concrete Implementations)
260
+
261
+ Create concrete functions that satisfy the port signatures:
262
+
263
+ ```python
264
+ # Adapter: filesystem implementation
265
+ def read_from_filesystem(input_dirname: str) -> Generator[Item, None, None]:
266
+ for file_name in os.listdir(input_dirname):
267
+ yield read_file(os.path.join(input_dirname, file_name))
268
+
269
+ # Adapter: database implementation (alternative)
270
+ def read_from_database(connection_string: str) -> Generator[Item, None, None]:
271
+ # ... database-specific logic
272
+ ```
273
+
274
+ ### Abstract Process Functions
275
+
276
+ Write core logic that accepts injected functions:
277
+
278
+ ```python
279
+ def abstract_process[I, O](
280
+ reader: TItemReader[I],
281
+ reader_input: I,
282
+ transform: TTransform,
283
+ writer: TItemWriter[O],
284
+ writer_input: O,
285
+ ) -> None:
286
+ """Core business logic - knows nothing about filesystems, databases, etc."""
287
+ raw_items = reader(reader_input)
288
+ processed = (transform(item) for item in raw_items)
289
+ writer(processed, writer_input)
290
+ ```
291
+
292
+ ### Wiring: Injecting Dependencies
293
+
294
+ Create concrete entry points that wire everything together:
295
+
296
+ ```python
297
+ def main_filesystem(input_dir: str, output_dir: str) -> None:
298
+ """Concrete implementation using filesystem adapters."""
299
+ abstract_process(
300
+ reader=read_from_filesystem,
301
+ reader_input=input_dir,
302
+ transform=my_transform_function,
303
+ writer=write_to_filesystem,
304
+ writer_input=output_dir,
305
+ )
306
+ ```
307
+
308
+ ### Benefits Over OOP-Style Dependency Injection
309
+
310
+ | Aspect | FP Style | OOP Style |
311
+ |--------|----------|-----------|
312
+ | Interface definition | Type alias | Abstract class/Protocol |
313
+ | Boilerplate | Minimal | Class definitions, `__init__`, etc. |
314
+ | Testing | Pass mock functions directly | Mock objects, DI frameworks |
315
+ | Composition | Natural function composition | Decorator pattern, etc. |
316
+ | State | Explicit (parameters) | Hidden in `self` |
317
+
318
+ ### When to Use This Pattern
319
+
320
+ Use functional hexagonal architecture when:
321
+
322
+ - Processing pipelines (read → transform → write)
323
+ - Multiple I/O backends are possible (filesystem, database, API)
324
+ - Business logic should be testable in isolation
325
+ - You want to swap implementations without changing core logic
326
+
327
+ ### Example: Complete Module Structure
328
+
329
+ ```python
330
+ # types.py - Port definitions
331
+ type TValidate = Callable[[str], str]
332
+ type TSanitize = Callable[[str], str]
333
+ type TItemReader[In] = Callable[[In], Generator[Item, None, None]]
334
+ type TItemWriter[Out] = Callable[[Generator[Item, None, None], Out], None]
335
+
336
+ # core.py - Abstract process (pure business logic)
337
+ def process_content(
338
+ content: str,
339
+ validate: TValidate,
340
+ sanitize: TSanitize,
341
+ ) -> str:
342
+ return sanitize(validate(content))
343
+
344
+ def abstract_process[I, O](...) -> None:
345
+ # Orchestration logic
346
+
347
+ # adapters/filesystem.py - Filesystem adapter
348
+ def filesystem_reader(dirname: str) -> Generator[Item, None, None]: ...
349
+ def filesystem_writer(items: Generator[Item, None, None], dirname: str) -> None: ...
350
+
351
+ # adapters/transforms.py - Transform implementations
352
+ def regex_validate(content: str) -> str: ...
353
+ def html_sanitize(content: str) -> str: ...
354
+
355
+ # main.py - Wiring
356
+ def main_filesystem(input_dir: str, output_dir: str) -> None:
357
+ abstract_process(
358
+ reader=filesystem_reader,
359
+ reader_input=input_dir,
360
+ validate=regex_validate,
361
+ sanitize=html_sanitize,
362
+ writer=filesystem_writer,
363
+ writer_input=output_dir,
364
+ )
365
+ ```
366
+
367
+ ### Streaming with Chained Generators
368
+
369
+ This extends the `abstract_process` pattern shown above with multiple chained transformation steps. Each step is lazy - no intermediate lists are allocated - and only the terminal function at the end of the chain triggers evaluation and produces side effects:
370
+
371
+ ```python
372
+ from typing import Callable, Generator
373
+
374
+ type TReader[In] = Callable[[In], Generator[Item, None, None]]
375
+ type TTransform = Callable[[Item], Item]
376
+ type TToRow = Callable[[Item], str]
377
+ type TWriter[Out] = Callable[[Generator[str, None, None], Out], None]
378
+
379
+ def process_pipeline[I, O](
380
+ reader: TReader[I],
381
+ reader_input: I,
382
+ validate: TTransform,
383
+ transform: TTransform,
384
+ to_row: TToRow,
385
+ writer: TWriter[O],
386
+ writer_output: O,
387
+ ) -> None:
388
+ raw_items = reader(reader_input) # Generator[Item, None, None]
389
+ validated = (validate(item) for item in raw_items) # lazy
390
+ transformed = (transform(item) for item in validated) # lazy
391
+ rows = (to_row(item) for item in transformed) # lazy
392
+ writer(rows, writer_output) # terminal: consumes the chain
393
+ ```
394
+
395
+ The entire chain evaluates one item at a time, end to end, before pulling the next. This keeps memory usage constant regardless of input size. The terminal function (here `writer`) is the only place where side effects occur - everything upstream is a pure transformation.
396
+
397
+ This composes naturally with the hexagonal architecture: `reader` and `writer` are adapters, `validate`, `transform`, and `to_row` are injected core functions, and `process_pipeline` is the wiring.
398
+
399
+ ## Code Organization
400
+
401
+ ### Module Structure
402
+
403
+ - **Types/Ports**: Type aliases defining function signatures (interfaces)
404
+ - **Core**: Abstract process functions that accept injected dependencies
405
+ - **Adapters**: Concrete implementations for I/O and transformations
406
+ - **Main/Wiring**: Entry points that wire adapters into core logic
407
+ - **Models**: Define data structures using frozen, slotted classes (e.g., `attrs.define(frozen=True, slots=True)`)
408
+ - **No classes** except for simple data containers and index structures
409
+
410
+ ### Function Design
411
+
412
+ Functions should be:
413
+
414
+ - **Pure** when possible (no side effects)
415
+ - **Small and focused** (single responsibility)
416
+ - **Composable** (easy to combine with other functions)
417
+ - **Injectable** (accept dependencies as parameters rather than importing them)
418
+
419
+ ### Imports
420
+
421
+ Group imports in the following order:
422
+
423
+ 1. Standard library imports
424
+ 2. Third-party library imports
425
+ 3. Local application imports
426
+
427
+ Within each group, sort alphabetically.
428
+
429
+ ### Logging over Print
430
+
431
+ Use the `logging` module for all output beyond throwaway debugging. `print` statements should not appear in committed code.
432
+
433
+ ```python
434
+ import logging
435
+
436
+ logger = logging.getLogger(__name__)
437
+
438
+ logger.info("Processing %d items", count)
439
+ logger.error("Failed to connect to %s", url)
440
+ ```
441
+
442
+ ## Testing
443
+
444
+ ### Test Requirements
445
+
446
+ - All new functionality must have corresponding tests
447
+ - Tests must pass with `pytest`
448
+ - Test coverage should be comprehensive
449
+ - Tests should be deterministic and fast
450
+
451
+ ### Test Structure
452
+
453
+ ```python
454
+ def test_feature_description() -> None:
455
+ """Brief description of what is being tested."""
456
+ # Arrange
457
+ input_data = create_test_data()
458
+
459
+ # Act
460
+ result = function_under_test(input_data)
461
+
462
+ # Assert
463
+ assert result == expected_value
464
+ ```
465
+
466
+ ### Parametrized Tests
467
+
468
+ Use `pytest.mark.parametrize` for testing multiple cases:
469
+
470
+ ```python
471
+ @pytest.mark.parametrize(
472
+ "input_value, expected_output",
473
+ [
474
+ (case1_input, case1_output),
475
+ (case2_input, case2_output),
476
+ ],
477
+ )
478
+ def test_multiple_cases(input_value: str, expected_output: str) -> None:
479
+ assert transform(input_value) == expected_output
480
+ ```
481
+
482
+ ## Documentation
483
+
484
+ ### Docstrings
485
+
486
+ All public functions and classes must have docstrings following this format:
487
+
488
+ ```python
489
+ def function_name(param1: Type1, param2: Type2) -> ReturnType:
490
+ """Brief one-line description.
491
+
492
+ Longer description if needed, explaining the purpose and behavior.
493
+
494
+ Args:
495
+ param1: Description of first parameter
496
+ param2: Description of second parameter
497
+
498
+ Returns:
499
+ Description of return value
500
+ """
501
+ ```
502
+
503
+ ### Comments
504
+
505
+ - Use comments sparingly - prefer self-documenting code
506
+ - Explain **why**, not **what** (the code shows what)
507
+ - Update comments when code changes
508
+
509
+ ## Formatting
510
+
511
+ ### General Style
512
+
513
+ - Follow PEP 8 conventions
514
+ - Line length: 88 characters (Black default)
515
+ - Use double quotes for strings
516
+ - Use trailing commas in multi-line structures
517
+
518
+ ### Function Signatures
519
+
520
+ For functions with many parameters, format each parameter on its own line:
521
+
522
+ ```python
523
+ def complex_function(
524
+ parameter1: Type1,
525
+ parameter2: Type2,
526
+ parameter3: Type3 = default_value,
527
+ ) -> ReturnType:
528
+ pass
529
+ ```
530
+
531
+ ## Error Handling
532
+
533
+ ### Type-Safe Error Handling
534
+
535
+ Handle expected errors explicitly:
536
+
537
+ ```python
538
+ # Check conditions and return early
539
+ if not valid_input(data):
540
+ return default_value
541
+
542
+ # Use isinstance for type narrowing
543
+ if isinstance(value, ExpectedType):
544
+ process(value)
545
+ ```
546
+
547
+ ### Avoid Bare Except
548
+
549
+ Always catch specific exceptions:
550
+
551
+ ```python
552
+ # Preferred
553
+ try:
554
+ risky_operation()
555
+ except ValueError as e:
556
+ handle_value_error(e)
557
+
558
+ # Avoid
559
+ try:
560
+ risky_operation()
561
+ except:
562
+ pass
563
+ ```
564
+
565
+ ## Version Control
566
+
567
+ ### Commit Messages
568
+
569
+ Follow conventional commits format:
570
+
571
+ ```
572
+ type(scope): brief description
573
+
574
+ Longer explanation if needed.
575
+ ```
576
+
577
+ Types: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`
578
+
579
+ ## Tools
580
+
581
+ ### Required Tools
582
+
583
+ - `mypy` - Type checking with `--strict` mode
584
+ - `pytest` - Testing framework
585
+ - A dependency manager such as `poetry`, `uv`, or `pip`
586
+
587
+ ### Running Checks
588
+
589
+ ```bash
590
+ # Type checking
591
+ mypy .
592
+
593
+ # Run tests
594
+ pytest
595
+
596
+ # Run specific test file
597
+ pytest tests/path/to/test_file.py -v
598
+ ```
599
+
600
+ ### Suggested `pyproject.toml` Configuration
601
+
602
+ A strict mypy + Pydantic setup (works with any build backend — Poetry, uv, etc.):
603
+
604
+ ```toml
605
+ [tool.mypy]
606
+ python_version = "3.13"
607
+ strict = true
608
+ explicit_package_bases = true
609
+ warn_unreachable = true
610
+ disallow_any_explicit = true
611
+ disallow_any_unimported = true
612
+ disallow_any_decorated = true
613
+ enable_error_code = [
614
+ "possibly-undefined",
615
+ "redundant-expr",
616
+ "truthy-bool",
617
+ "truthy-iterable",
618
+ "exhaustive-match",
619
+ ]
620
+ mypy_path = "."
621
+ plugins = ["pydantic.mypy"]
622
+
623
+ [tool.pydantic-mypy]
624
+ init_forbid_extra = true
625
+ init_typed = true
626
+ warn_required_dynamic_aliases = true
627
+ warn_untyped_fields = true
628
+
629
+ # Override for third-party libraries that ship without type stubs.
630
+ # Add libraries here only when no stubs exist (check typeshed / pypi for *-stubs).
631
+ [[tool.mypy.overrides]]
632
+ module = [
633
+ "some_untyped_lib",
634
+ "another_untyped_lib",
635
+ ]
636
+ ignore_missing_imports = true
637
+ ```
638
+
639
+ ## Summary
640
+
641
+ This guide prioritizes:
642
+
643
+ 1. **Type safety** - Strict mypy compliance without escape hatches
644
+ 2. **Immutability and clarity** - Tuples, frozensets, frozen data classes, comprehensions over loops
645
+ 3. **Performance** - Batch I/O, appropriate data structures, generators for constant memory
646
+ 4. **Testability** - Comprehensive test coverage with fast, deterministic tests
647
+
648
+ When in doubt, consult existing code in the project for examples of these patterns in practice.