graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
graflo/util/merge.py ADDED
@@ -0,0 +1,150 @@
1
+ """Document merging utilities.
2
+
3
+ This module provides functions for merging documents based on common index keys,
4
+ preserving order and handling both dict and VertexRep objects.
5
+
6
+ Key Functions:
7
+ - merge_doc_basis: Merge documents based on common index keys, preserving order
8
+
9
+ """
10
+
11
+ from typing import cast, overload
12
+
13
+ from graflo.architecture.onto import VertexRep
14
+
15
+
16
+ @overload
17
+ def merge_doc_basis(
18
+ docs: list[dict],
19
+ index_keys: tuple[str, ...],
20
+ ) -> list[dict]: ...
21
+
22
+
23
+ @overload
24
+ def merge_doc_basis(
25
+ docs: list[VertexRep],
26
+ index_keys: tuple[str, ...],
27
+ ) -> list[VertexRep]: ...
28
+
29
+
30
+ def merge_doc_basis(
31
+ docs: list[dict] | list[VertexRep],
32
+ index_keys: tuple[str, ...],
33
+ ) -> list[dict] | list[VertexRep]:
34
+ """Merge documents based on common index keys, preserving order.
35
+
36
+ This function merges documents that share common index key-value combinations,
37
+ preserving the order of documents based on the first occurrence of each index
38
+ key combination. Documents without index keys are merged into the closest
39
+ preceding document with index keys. If no documents have index keys, all
40
+ documents are merged into a single document.
41
+
42
+ For VertexRep objects, the merge is performed on the `vertex` attribute, and
43
+ `ctx` dicts are merged among merged VertexReps.
44
+
45
+ Args:
46
+ docs: Homogeneous list of documents (all dict or all VertexRep) to merge
47
+ index_keys: Tuple of key names to use for merging
48
+
49
+ Returns:
50
+ Merged documents in order of first occurrence (same type as input)
51
+ """
52
+ if not docs:
53
+ return docs
54
+
55
+ # Check if we're working with VertexRep objects
56
+ is_vertexrep = isinstance(docs[0], VertexRep)
57
+
58
+ # Track merged documents in order of first occurrence
59
+ # Type: list[dict] if not is_vertexrep, list[VertexRep] if is_vertexrep
60
+ merged_docs: list[dict | VertexRep] = []
61
+ # Map from index tuple to position in merged_docs
62
+ index_to_position: dict[tuple, int] = {}
63
+ # Accumulate documents without index keys
64
+ # Type: list[dict] if not is_vertexrep, list[VertexRep] if is_vertexrep
65
+ pending_non_ids: list[dict | VertexRep] = []
66
+
67
+ def get_index_tuple(doc: dict | VertexRep) -> tuple:
68
+ """Extract index tuple from a document."""
69
+ if is_vertexrep:
70
+ assert isinstance(doc, VertexRep)
71
+ data = doc.vertex
72
+ else:
73
+ assert isinstance(doc, dict)
74
+ data = doc
75
+ return tuple(sorted((k, v) for k, v in data.items() if k in index_keys))
76
+
77
+ def has_index_keys(doc: dict | VertexRep) -> bool:
78
+ """Check if document has any index keys."""
79
+ if is_vertexrep:
80
+ assert isinstance(doc, VertexRep)
81
+ return any(k in doc.vertex for k in index_keys)
82
+ else:
83
+ assert isinstance(doc, dict)
84
+ return any(k in doc for k in index_keys)
85
+
86
+ def merge_doc(target: dict | VertexRep, source: dict | VertexRep) -> None:
87
+ """Merge source into target."""
88
+ if is_vertexrep:
89
+ assert isinstance(target, VertexRep) and isinstance(source, VertexRep)
90
+ target.vertex.update(source.vertex)
91
+ target.ctx.update(source.ctx)
92
+ else:
93
+ assert isinstance(target, dict) and isinstance(source, dict)
94
+ target.update(source)
95
+
96
+ def copy_doc(doc: dict | VertexRep) -> dict | VertexRep:
97
+ """Create a copy of a document."""
98
+ if is_vertexrep:
99
+ assert isinstance(doc, VertexRep)
100
+ return VertexRep(vertex=doc.vertex.copy(), ctx=doc.ctx.copy())
101
+ else:
102
+ assert isinstance(doc, dict)
103
+ return doc.copy()
104
+
105
+ for doc in docs:
106
+ if has_index_keys(doc):
107
+ # This is a document with index keys
108
+ index_tuple = get_index_tuple(doc)
109
+
110
+ # First, handle any accumulated non-ID documents
111
+ if pending_non_ids:
112
+ if merged_docs:
113
+ # Merge accumulated non-IDs into the last ID doc
114
+ for pending in pending_non_ids:
115
+ merge_doc(merged_docs[-1], pending)
116
+ else:
117
+ # No previous ID doc, merge pending non-IDs into the current ID doc
118
+ for pending in pending_non_ids:
119
+ merge_doc(doc, pending)
120
+ pending_non_ids.clear()
121
+
122
+ # Handle the current document with index keys
123
+ if index_tuple in index_to_position:
124
+ # Merge into existing document at that position
125
+ merge_doc(merged_docs[index_to_position[index_tuple]], doc)
126
+ else:
127
+ # First occurrence of this index tuple, add new document
128
+ merged_docs.append(copy_doc(doc))
129
+ index_to_position[index_tuple] = len(merged_docs) - 1
130
+ else:
131
+ # This is a document without index keys, accumulate it
132
+ pending_non_ids.append(doc)
133
+
134
+ # Handle any remaining non-ID documents at the end
135
+ if pending_non_ids and merged_docs:
136
+ # Merge into last ID doc
137
+ for pending in pending_non_ids:
138
+ merge_doc(merged_docs[-1], pending)
139
+ elif pending_non_ids:
140
+ # No documents with index keys: merge all into a single document
141
+ if is_vertexrep:
142
+ merged_doc = VertexRep(vertex={}, ctx={})
143
+ else:
144
+ merged_doc = {}
145
+ for pending in pending_non_ids:
146
+ merge_doc(merged_doc, pending)
147
+ merged_docs.append(merged_doc)
148
+
149
+ # Type narrowing: return type matches input type due to homogeneous list requirement
150
+ return cast(list[dict] | list[VertexRep], merged_docs)
graflo/util/misc.py ADDED
@@ -0,0 +1,37 @@
1
+ """Miscellaneous utility functions.
2
+
3
+ This module provides various utility functions for data manipulation and processing.
4
+
5
+ Key Functions:
6
+ - sorted_dicts: Recursively sort dictionaries and lists for consistent ordering
7
+ """
8
+
9
+
10
+ def sorted_dicts(d):
11
+ """Recursively sort dictionaries and lists for consistent ordering.
12
+
13
+ This function recursively sorts dictionaries and lists to ensure consistent
14
+ ordering of data structures. It handles nested structures and preserves
15
+ non-collection values.
16
+
17
+ Args:
18
+ d: Data structure to sort (dict, list, tuple, or other)
19
+
20
+ Returns:
21
+ The sorted data structure with consistent ordering
22
+
23
+ Example:
24
+ >>> data = {"b": 2, "a": 1, "c": [3, 1, 2]}
25
+ >>> sorted_dicts(data)
26
+ {"a": 1, "b": 2, "c": [1, 2, 3]}
27
+ """
28
+ if isinstance(d, (tuple, list)):
29
+ if d and all([not isinstance(dd, (list, tuple, dict)) for dd in d[0].values()]):
30
+ return sorted(d, key=lambda x: tuple(x.items()))
31
+ elif isinstance(d, dict):
32
+ return {
33
+ k: v if not isinstance(v, (list, tuple, dict)) else sorted_dicts(v)
34
+ for k, v in d.items()
35
+ }
36
+
37
+ return d
graflo/util/onto.py ADDED
@@ -0,0 +1,422 @@
1
+ """Utility ontology classes for resource patterns and configurations.
2
+
3
+ This module provides data classes for managing resource patterns (files and database tables)
4
+ and configurations used throughout the system. These classes support resource discovery,
5
+ pattern matching, and configuration management.
6
+
7
+ Key Components:
8
+ - ResourcePattern: Abstract base class for resource patterns
9
+ - FilePattern: Configuration for file pattern matching
10
+ - TablePattern: Configuration for database table pattern matching
11
+ - Patterns: Collection of named resource patterns with connection management
12
+ """
13
+
14
+ import abc
15
+ import dataclasses
16
+ import pathlib
17
+ import re
18
+ from typing import TYPE_CHECKING, Any
19
+
20
+ from graflo.onto import BaseDataclass, BaseEnum
21
+
22
+ if TYPE_CHECKING:
23
+ from graflo.db.connection.onto import PostgresConfig
24
+ else:
25
+ # Import at runtime for type evaluation
26
+ try:
27
+ from graflo.db.connection.onto import PostgresConfig
28
+ except ImportError:
29
+ PostgresConfig = Any # type: ignore
30
+
31
+
32
+ class ResourceType(BaseEnum):
33
+ """Resource types for data sources.
34
+
35
+ Resource types distinguish between different data source categories.
36
+ File type detection (CSV, JSON, JSONL, Parquet, etc.) is handled
37
+ automatically by the loader based on file extensions.
38
+
39
+ Attributes:
40
+ FILE: File-based data source (any format: CSV, JSON, JSONL, Parquet, etc.)
41
+ SQL_TABLE: SQL database table (e.g., PostgreSQL table)
42
+ """
43
+
44
+ FILE = "file"
45
+ SQL_TABLE = "sql_table"
46
+
47
+
48
+ @dataclasses.dataclass
49
+ class ResourcePattern(BaseDataclass, abc.ABC):
50
+ """Abstract base class for resource patterns (files or tables).
51
+
52
+ Provides common API for pattern matching and resource identification.
53
+ All concrete pattern types inherit from this class.
54
+
55
+ Attributes:
56
+ resource_name: Name of the resource this pattern matches
57
+ """
58
+
59
+ resource_name: str | None = None
60
+
61
+ @abc.abstractmethod
62
+ def matches(self, resource_identifier: str) -> bool:
63
+ """Check if pattern matches a resource identifier.
64
+
65
+ Args:
66
+ resource_identifier: Identifier to match (filename or table name)
67
+
68
+ Returns:
69
+ bool: True if pattern matches
70
+ """
71
+ pass
72
+
73
+ @abc.abstractmethod
74
+ def get_resource_type(self) -> ResourceType:
75
+ """Get the type of resource this pattern matches.
76
+
77
+ Returns:
78
+ ResourceType: Resource type enum value
79
+ """
80
+ pass
81
+
82
+
83
+ @dataclasses.dataclass
84
+ class FilePattern(ResourcePattern):
85
+ """Pattern for matching files.
86
+
87
+ Attributes:
88
+ regex: Regular expression pattern for matching filenames
89
+ sub_path: Path to search for matching files (default: "./")
90
+ date_field: Name of the date field to filter on (for date-based filtering)
91
+ date_filter: SQL-style date filter condition (e.g., "> '2020-10-10'")
92
+ date_range_start: Start date for range filtering (e.g., "2015-11-11")
93
+ date_range_days: Number of days after start date (used with date_range_start)
94
+ """
95
+
96
+ class _(BaseDataclass.Meta):
97
+ tag = "file"
98
+
99
+ regex: str | None = None
100
+ sub_path: None | pathlib.Path = dataclasses.field(
101
+ default_factory=lambda: pathlib.Path("./")
102
+ )
103
+ date_field: str | None = None
104
+ date_filter: str | None = None
105
+ date_range_start: str | None = None
106
+ date_range_days: int | None = None
107
+
108
+ def __post_init__(self):
109
+ """Initialize and validate the file pattern.
110
+
111
+ Ensures that sub_path is a Path object and is not None.
112
+ """
113
+ if not isinstance(self.sub_path, pathlib.Path):
114
+ self.sub_path = pathlib.Path(self.sub_path)
115
+ assert self.sub_path is not None
116
+ # Validate date filtering parameters (note: date filtering for files is not yet implemented)
117
+ if (self.date_filter or self.date_range_start) and not self.date_field:
118
+ raise ValueError(
119
+ "date_field is required when using date_filter or date_range_start"
120
+ )
121
+ if self.date_range_days is not None and not self.date_range_start:
122
+ raise ValueError("date_range_start is required when using date_range_days")
123
+
124
+ def matches(self, filename: str) -> bool:
125
+ """Check if pattern matches a filename.
126
+
127
+ Args:
128
+ filename: Filename to match
129
+
130
+ Returns:
131
+ bool: True if pattern matches
132
+ """
133
+ if self.regex is None:
134
+ return False
135
+ return bool(re.match(self.regex, filename))
136
+
137
+ def get_resource_type(self) -> ResourceType:
138
+ """Get resource type.
139
+
140
+ FilePattern always represents a FILE resource type.
141
+ The specific file format (CSV, JSON, JSONL, Parquet, etc.) is
142
+ automatically detected by the loader based on file extensions.
143
+ """
144
+ return ResourceType.FILE
145
+
146
+
147
+ @dataclasses.dataclass
148
+ class TablePattern(ResourcePattern):
149
+ """Pattern for matching database tables.
150
+
151
+ Attributes:
152
+ table_name: Exact table name or regex pattern
153
+ schema_name: Schema name (optional, defaults to public)
154
+ database: Database name (optional)
155
+ date_field: Name of the date field to filter on (for date-based filtering)
156
+ date_filter: SQL-style date filter condition (e.g., "> '2020-10-10'")
157
+ date_range_start: Start date for range filtering (e.g., "2015-11-11")
158
+ date_range_days: Number of days after start date (used with date_range_start)
159
+ """
160
+
161
+ class _(BaseDataclass.Meta):
162
+ tag = "table"
163
+
164
+ table_name: str = ""
165
+ schema_name: str | None = None
166
+ database: str | None = None
167
+ date_field: str | None = None
168
+ date_filter: str | None = None
169
+ date_range_start: str | None = None
170
+ date_range_days: int | None = None
171
+
172
+ def __post_init__(self):
173
+ """Validate table pattern after initialization."""
174
+ if not self.table_name:
175
+ raise ValueError("table_name is required for TablePattern")
176
+ # Validate date filtering parameters
177
+ if (self.date_filter or self.date_range_start) and not self.date_field:
178
+ raise ValueError(
179
+ "date_field is required when using date_filter or date_range_start"
180
+ )
181
+ if self.date_range_days is not None and not self.date_range_start:
182
+ raise ValueError("date_range_start is required when using date_range_days")
183
+
184
+ def matches(self, table_identifier: str) -> bool:
185
+ """Check if pattern matches a table name.
186
+
187
+ Args:
188
+ table_identifier: Table name to match (format: schema.table or just table)
189
+
190
+ Returns:
191
+ bool: True if pattern matches
192
+ """
193
+ if not self.table_name:
194
+ return False
195
+
196
+ # Compile regex pattern
197
+ if self.table_name.startswith("^") or self.table_name.endswith("$"):
198
+ # Already a regex pattern
199
+ pattern = re.compile(self.table_name)
200
+ else:
201
+ # Exact match pattern
202
+ pattern = re.compile(f"^{re.escape(self.table_name)}$")
203
+
204
+ # Check if table_identifier matches
205
+ if pattern.match(table_identifier):
206
+ return True
207
+
208
+ # If schema_name is specified, also check schema.table format
209
+ if self.schema_name:
210
+ full_name = f"{self.schema_name}.{table_identifier}"
211
+ if pattern.match(full_name):
212
+ return True
213
+
214
+ return False
215
+
216
+ def get_resource_type(self) -> ResourceType:
217
+ """Get resource type."""
218
+ return ResourceType.SQL_TABLE
219
+
220
+ def build_where_clause(self) -> str:
221
+ """Build SQL WHERE clause from date filtering parameters.
222
+
223
+ Returns:
224
+ WHERE clause string (without the WHERE keyword) or empty string if no filters
225
+ """
226
+ conditions = []
227
+
228
+ if self.date_field:
229
+ if self.date_range_start and self.date_range_days is not None:
230
+ # Range filtering: dt >= start_date AND dt < start_date + interval
231
+ # Example: Ingest for k days after 2015-11-11
232
+ conditions.append(
233
+ f"\"{self.date_field}\" >= '{self.date_range_start}'::date"
234
+ )
235
+ conditions.append(
236
+ f"\"{self.date_field}\" < '{self.date_range_start}'::date + INTERVAL '{self.date_range_days} days'"
237
+ )
238
+ elif self.date_filter:
239
+ # Direct filter: dt > 2020-10-10 or dt > '2020-10-10'
240
+ # The date_filter should include the operator and value
241
+ # If value doesn't have quotes, add them
242
+ filter_parts = self.date_filter.strip().split(None, 1)
243
+ if len(filter_parts) == 2:
244
+ operator, value = filter_parts
245
+ # Add quotes if not already present and value looks like a date
246
+ if not (value.startswith("'") and value.endswith("'")):
247
+ # Check if it's a date-like string (YYYY-MM-DD format)
248
+ if len(value) == 10 and value.count("-") == 2:
249
+ value = f"'{value}'"
250
+ conditions.append(f'"{self.date_field}" {operator} {value}')
251
+ else:
252
+ # If format is unexpected, use as-is
253
+ conditions.append(f'"{self.date_field}" {self.date_filter}')
254
+
255
+ if conditions:
256
+ return " AND ".join(conditions)
257
+ return ""
258
+
259
+
260
+ @dataclasses.dataclass
261
+ class Patterns(BaseDataclass):
262
+ """Collection of named resource patterns with connection management.
263
+
264
+ This class manages a collection of resource patterns (files or tables),
265
+ each associated with a name. It efficiently handles PostgreSQL connections
266
+ by grouping tables that share the same connection configuration.
267
+
268
+ The constructor accepts:
269
+ - resource_mapping: dict mapping resource_name -> (file_path or table_name)
270
+ - postgres_connections: dict mapping config_key -> PostgresConfig
271
+ where config_key identifies a connection configuration
272
+ - postgres_tables: dict mapping table_name -> (config_key, schema_name, table_name)
273
+
274
+ Attributes:
275
+ patterns: Dictionary mapping resource names to ResourcePattern instances
276
+ postgres_configs: Dictionary mapping (config_key, schema_name) to PostgresConfig
277
+ postgres_table_configs: Dictionary mapping resource_name to (config_key, schema_name, table_name)
278
+ """
279
+
280
+ patterns: dict[str, TablePattern | FilePattern] = dataclasses.field(
281
+ default_factory=dict
282
+ )
283
+ postgres_configs: dict[tuple[str, str | None], Any] = dataclasses.field(
284
+ default_factory=dict, metadata={"exclude": True}
285
+ )
286
+ postgres_table_configs: dict[str, tuple[str, str | None, str]] = dataclasses.field(
287
+ default_factory=dict, metadata={"exclude": True}
288
+ )
289
+ # Initialization parameters (not stored as fields, excluded from serialization)
290
+ # Use Any for _postgres_connections to avoid type evaluation issues with dataclass_wizard
291
+ _resource_mapping: dict[str, str | tuple[str, str]] | None = dataclasses.field(
292
+ default=None, repr=False, compare=False, metadata={"exclude": True}
293
+ )
294
+ _postgres_connections: dict[str, Any] | None = dataclasses.field(
295
+ default=None, repr=False, compare=False, metadata={"exclude": True}
296
+ )
297
+ _postgres_tables: dict[str, tuple[str, str | None, str]] | None = dataclasses.field(
298
+ default=None, repr=False, compare=False, metadata={"exclude": True}
299
+ )
300
+
301
+ def __post_init__(self):
302
+ """Initialize Patterns from resource mappings and PostgreSQL configurations."""
303
+ # Store PostgreSQL connection configs
304
+ if self._postgres_connections:
305
+ for config_key, config in self._postgres_connections.items():
306
+ if config is not None:
307
+ schema_name = config.schema_name
308
+ self.postgres_configs[(config_key, schema_name)] = config
309
+
310
+ # Process resource mappings
311
+ if self._resource_mapping:
312
+ for resource_name, resource_spec in self._resource_mapping.items():
313
+ if isinstance(resource_spec, str):
314
+ # File path - create FilePattern
315
+ file_path = pathlib.Path(resource_spec)
316
+ pattern = FilePattern(
317
+ regex=f"^{re.escape(file_path.name)}$",
318
+ sub_path=file_path.parent,
319
+ resource_name=resource_name,
320
+ )
321
+ self.patterns[resource_name] = pattern
322
+ elif isinstance(resource_spec, tuple) and len(resource_spec) == 2:
323
+ # (config_key, table_name) tuple - create TablePattern
324
+ config_key, table_name = resource_spec
325
+ # Find the schema_name from the config
326
+ config = (
327
+ self._postgres_connections.get(config_key)
328
+ if self._postgres_connections
329
+ else None
330
+ )
331
+ schema_name = config.schema_name if config else None
332
+
333
+ pattern = TablePattern(
334
+ table_name=table_name,
335
+ schema_name=schema_name,
336
+ resource_name=resource_name,
337
+ )
338
+ self.patterns[resource_name] = pattern
339
+ # Store the config mapping
340
+ self.postgres_table_configs[resource_name] = (
341
+ config_key,
342
+ schema_name,
343
+ table_name,
344
+ )
345
+
346
+ # Process explicit postgres_tables mapping
347
+ if self._postgres_tables:
348
+ for table_name, (
349
+ config_key,
350
+ schema_name,
351
+ actual_table_name,
352
+ ) in self._postgres_tables.items():
353
+ pattern = TablePattern(
354
+ table_name=actual_table_name,
355
+ schema_name=schema_name,
356
+ resource_name=table_name,
357
+ )
358
+ self.patterns[table_name] = pattern
359
+ self.postgres_table_configs[table_name] = (
360
+ config_key,
361
+ schema_name,
362
+ actual_table_name,
363
+ )
364
+
365
+ def add_file_pattern(self, name: str, file_pattern: FilePattern):
366
+ """Add a file pattern to the collection.
367
+
368
+ Args:
369
+ name: Name of the pattern
370
+ file_pattern: FilePattern instance
371
+ """
372
+ self.patterns[name] = file_pattern
373
+
374
+ def add_table_pattern(self, name: str, table_pattern: TablePattern):
375
+ """Add a table pattern to the collection.
376
+
377
+ Args:
378
+ name: Name of the pattern
379
+ table_pattern: TablePattern instance
380
+ """
381
+ self.patterns[name] = table_pattern
382
+
383
+ def get_postgres_config(self, resource_name: str) -> Any:
384
+ """Get PostgreSQL connection config for a resource.
385
+
386
+ Args:
387
+ resource_name: Name of the resource
388
+
389
+ Returns:
390
+ PostgresConfig if resource is a PostgreSQL table, None otherwise
391
+ """
392
+ if resource_name in self.postgres_table_configs:
393
+ config_key, schema_name, _ = self.postgres_table_configs[resource_name]
394
+ return self.postgres_configs.get((config_key, schema_name))
395
+ return None
396
+
397
+ def get_resource_type(self, resource_name: str) -> ResourceType | None:
398
+ """Get the resource type for a resource name.
399
+
400
+ Args:
401
+ resource_name: Name of the resource
402
+
403
+ Returns:
404
+ ResourceType enum value or None if not found
405
+ """
406
+ if resource_name in self.patterns:
407
+ return self.patterns[resource_name].get_resource_type()
408
+ return None
409
+
410
+ def get_table_info(self, resource_name: str) -> tuple[str, str | None] | None:
411
+ """Get table name and schema for a PostgreSQL table resource.
412
+
413
+ Args:
414
+ resource_name: Name of the resource
415
+
416
+ Returns:
417
+ Tuple of (table_name, schema_name) or None if not a table resource
418
+ """
419
+ if resource_name in self.postgres_table_configs:
420
+ _, schema_name, table_name = self.postgres_table_configs[resource_name]
421
+ return (table_name, schema_name)
422
+ return None