graflo 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (45) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +39 -0
  3. graflo/architecture/__init__.py +37 -0
  4. graflo/architecture/actor.py +974 -0
  5. graflo/architecture/actor_util.py +425 -0
  6. graflo/architecture/edge.py +295 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +277 -0
  13. graflo/caster.py +409 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +144 -0
  16. graflo/cli/manage_dbs.py +193 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/db/__init__.py +32 -0
  20. graflo/db/arango/__init__.py +16 -0
  21. graflo/db/arango/conn.py +734 -0
  22. graflo/db/arango/query.py +180 -0
  23. graflo/db/arango/util.py +88 -0
  24. graflo/db/connection.py +304 -0
  25. graflo/db/manager.py +104 -0
  26. graflo/db/neo4j/__init__.py +16 -0
  27. graflo/db/neo4j/conn.py +432 -0
  28. graflo/db/util.py +49 -0
  29. graflo/filter/__init__.py +21 -0
  30. graflo/filter/onto.py +400 -0
  31. graflo/logging.conf +22 -0
  32. graflo/onto.py +186 -0
  33. graflo/plot/__init__.py +17 -0
  34. graflo/plot/plotter.py +556 -0
  35. graflo/util/__init__.py +23 -0
  36. graflo/util/chunker.py +739 -0
  37. graflo/util/merge.py +148 -0
  38. graflo/util/misc.py +37 -0
  39. graflo/util/onto.py +63 -0
  40. graflo/util/transform.py +406 -0
  41. graflo-1.1.0.dist-info/METADATA +157 -0
  42. graflo-1.1.0.dist-info/RECORD +45 -0
  43. graflo-1.1.0.dist-info/WHEEL +4 -0
  44. graflo-1.1.0.dist-info/entry_points.txt +5 -0
  45. graflo-1.1.0.dist-info/licenses/LICENSE +126 -0
graflo/util/merge.py ADDED
@@ -0,0 +1,148 @@
1
+ """Document merging and discrimination utilities.
2
+
3
+ This module provides functions for merging and discriminating between documents
4
+ based on various criteria. It supports merging documents with common keys,
5
+ discriminating based on specific values, and handling different document structures.
6
+
7
+ Key Functions:
8
+ - discriminate_by_key: Filter documents based on index fields and key presence
9
+ - merge_doc_basis: Merge documents based on common index keys
10
+
11
+ """
12
+
13
+ from graflo.architecture.onto import VertexRep
14
+
15
+
16
+ def discriminate_by_key(items, indexes, discriminant_key, fast=False):
17
+ """Filter documents based on index fields and key presence.
18
+
19
+ This function filters a list of documents based on the presence of index fields
20
+ and a specific key. It can operate in fast mode to return after finding the
21
+ first match.
22
+
23
+ Args:
24
+ items: List of documents (dictionaries) to filter
25
+ indexes: List of index field names to check for presence
26
+ discriminant_key: Key to check for presence
27
+ fast: Whether to return after first match (default: False)
28
+
29
+ Returns:
30
+ list[dict]: Filtered list of documents
31
+ """
32
+ # pick items that have any of index field present
33
+ _items = [item for item in items if any(k in item for k in indexes)]
34
+
35
+ if discriminant_key is not None:
36
+ result = []
37
+ for item in _items:
38
+ if discriminant_key in item:
39
+ result += [item]
40
+ if fast:
41
+ break
42
+ return result
43
+ return _items
44
+
45
+
46
+ def merge_doc_basis(
47
+ docs: list[dict],
48
+ index_keys: tuple[str, ...],
49
+ discriminant_key=None,
50
+ ) -> list[dict]:
51
+ """Merge documents based on common index keys.
52
+
53
+ This function merges documents that share common index key-value combinations.
54
+ Documents without index keys are merged with the first relevant document that
55
+ has the discriminant key.
56
+
57
+ Note:
58
+ Currently works best with two groups of documents: those with and without
59
+ the discriminant key. Future versions will support multiple discriminant
60
+ value groups.
61
+
62
+ Args:
63
+ docs: List of documents to merge
64
+ index_keys: Tuple of key names to use for merging
65
+ discriminant_key: Optional key to use for merging documents without index keys
66
+
67
+ Returns:
68
+ list[dict]: Merged documents
69
+ """
70
+ docs_tuplezied = [
71
+ tuple(sorted((k, v) for k, v in item.items() if k in index_keys))
72
+ for item in docs
73
+ ]
74
+
75
+ # pick bearing docs : those that differ by index_keys
76
+ bearing_docs: dict[tuple, dict] = {q: dict() for q in set(docs_tuplezied)}
77
+
78
+ # merge docs with respect to unique index key-value combinations
79
+ for doc, doc_tuple in zip(docs, docs_tuplezied):
80
+ bearing_docs[doc_tuple].update(doc)
81
+
82
+ # merge docs without any index keys onto the first relevant doc
83
+ if () in docs_tuplezied:
84
+ relevant_docs = discriminate_by_key(
85
+ docs, index_keys, discriminant_key, fast=True
86
+ )
87
+ if relevant_docs:
88
+ tuple_ix = tuple(
89
+ sorted((k, v) for k, v in relevant_docs[0].items() if k in index_keys)
90
+ )
91
+ bearing_docs[tuple_ix].update(bearing_docs.pop(()))
92
+
93
+ return list(bearing_docs.values())
94
+
95
+
96
+ def merge_doc_basis_closest_preceding(
97
+ docs: list[VertexRep],
98
+ index_keys: tuple[str, ...],
99
+ ) -> list[VertexRep]:
100
+ """Merge VertexRep documents based on index_keys.
101
+
102
+ Leading non-ID VertexReps are merged into the first ID VertexRep.
103
+ Remaining non-ID VertexReps are merged into the closest preceding ID VertexRep.
104
+ The merge is performed on the `vertex` attribute, and `ctx` dicts are merged among merged VertexReps.
105
+
106
+ Args:
107
+ docs: List of VertexRep to merge
108
+ index_keys: Tuple of key names to use for merging
109
+
110
+ Returns:
111
+ list[VertexRep]: Merged VertexReps
112
+ """
113
+ merged_docs: list[VertexRep] = []
114
+ pending_non_ids: list[VertexRep] = []
115
+
116
+ def merge_vertex_ctx(target: VertexRep, sources: list[VertexRep]):
117
+ # Merge vertex dicts
118
+ for src in sources:
119
+ target.vertex.update(src.vertex)
120
+ target.ctx.update(src.ctx)
121
+ return target
122
+
123
+ for doc in docs:
124
+ if any(k in doc.vertex for k in index_keys):
125
+ # This is an ID VertexRep
126
+ # First, handle any accumulated non-ID VertexReps
127
+ if pending_non_ids:
128
+ if not merged_docs:
129
+ # No previous ID doc, create new one with accumulated non-IDs
130
+ merged_doc = VertexRep(vertex={}, ctx={})
131
+ merged_doc = merge_vertex_ctx(merged_doc, pending_non_ids)
132
+ merged_docs.append(merged_doc)
133
+ else:
134
+ # Merge accumulated non-IDs into the last ID doc
135
+ merged_docs[-1] = merge_vertex_ctx(merged_docs[-1], pending_non_ids)
136
+ pending_non_ids.clear()
137
+
138
+ # Add the current ID VertexRep (make a copy to avoid mutating input)
139
+ merged_docs.append(VertexRep(vertex=doc.vertex.copy(), ctx=doc.ctx.copy()))
140
+ else:
141
+ # This is a non-ID VertexRep, accumulate it
142
+ pending_non_ids.append(doc)
143
+
144
+ # Handle any remaining non-ID VertexReps at the end
145
+ if pending_non_ids and merged_docs:
146
+ merged_docs[-1] = merge_vertex_ctx(merged_docs[-1], pending_non_ids)
147
+
148
+ return merged_docs
graflo/util/misc.py ADDED
@@ -0,0 +1,37 @@
1
+ """Miscellaneous utility functions.
2
+
3
+ This module provides various utility functions for data manipulation and processing.
4
+
5
+ Key Functions:
6
+ - sorted_dicts: Recursively sort dictionaries and lists for consistent ordering
7
+ """
8
+
9
+
10
+ def sorted_dicts(d):
11
+ """Recursively sort dictionaries and lists for consistent ordering.
12
+
13
+ This function recursively sorts dictionaries and lists to ensure consistent
14
+ ordering of data structures. It handles nested structures and preserves
15
+ non-collection values.
16
+
17
+ Args:
18
+ d: Data structure to sort (dict, list, tuple, or other)
19
+
20
+ Returns:
21
+ The sorted data structure with consistent ordering
22
+
23
+ Example:
24
+ >>> data = {"b": 2, "a": 1, "c": [3, 1, 2]}
25
+ >>> sorted_dicts(data)
26
+ {"a": 1, "b": 2, "c": [1, 2, 3]}
27
+ """
28
+ if isinstance(d, (tuple, list)):
29
+ if d and all([not isinstance(dd, (list, tuple, dict)) for dd in d[0].values()]):
30
+ return sorted(d, key=lambda x: tuple(x.items()))
31
+ elif isinstance(d, dict):
32
+ return {
33
+ k: v if not isinstance(v, (list, tuple, dict)) else sorted_dicts(v)
34
+ for k, v in d.items()
35
+ }
36
+
37
+ return d
graflo/util/onto.py ADDED
@@ -0,0 +1,63 @@
1
+ """Utility ontology classes for file patterns and configurations.
2
+
3
+ This module provides data classes for managing file patterns and configurations
4
+ used throughout the system. These classes support file discovery, pattern matching,
5
+ and configuration management.
6
+
7
+ Key Components:
8
+ - FilePattern: Configuration for file pattern matching
9
+ - Patterns: Collection of named file patterns
10
+ """
11
+
12
+ import dataclasses
13
+ import pathlib
14
+
15
+ from graflo.onto import BaseDataclass
16
+
17
+
18
+ @dataclasses.dataclass
19
+ class FilePattern(BaseDataclass):
20
+ """Configuration for file pattern matching.
21
+
22
+ This class defines a pattern for matching files, including a regular expression
23
+ for matching filenames and a subdirectory path to search in.
24
+
25
+ Args:
26
+ regex: Regular expression pattern for matching filenames
27
+ sub_path: Path to search for matching files (default: "./")
28
+
29
+ Attributes:
30
+ regex: Regular expression pattern
31
+ sub_path: Path to search in
32
+ """
33
+
34
+ regex: str | None = None
35
+ sub_path: None | pathlib.Path = dataclasses.field(
36
+ default_factory=lambda: pathlib.Path("./")
37
+ )
38
+
39
+ def __post_init__(self):
40
+ """Initialize and validate the file pattern.
41
+
42
+ Ensures that sub_path is a Path object and is not None.
43
+ """
44
+ if not isinstance(self.sub_path, pathlib.Path):
45
+ self.sub_path = pathlib.Path(self.sub_path)
46
+ assert self.sub_path is not None
47
+
48
+
49
+ @dataclasses.dataclass
50
+ class Patterns(BaseDataclass):
51
+ """Collection of named file patterns.
52
+
53
+ This class manages a collection of file patterns, each associated with a name.
54
+ It provides a way to organize and access multiple file patterns.
55
+
56
+ Args:
57
+ patterns: Dictionary mapping names to FilePattern instances
58
+
59
+ Attributes:
60
+ patterns: Dictionary of named file patterns
61
+ """
62
+
63
+ patterns: dict[str, FilePattern] = dataclasses.field(default_factory=dict)
@@ -0,0 +1,406 @@
1
+ """Data transformation utilities for graph operations.
2
+
3
+ This module provides utility functions for transforming and standardizing data
4
+ in various formats, particularly for graph database operations. It includes
5
+ functions for date parsing, string standardization, and data cleaning.
6
+
7
+ Key Functions:
8
+ - standardize: Standardize string keys and names
9
+ - parse_date_*: Various date parsing functions for different formats
10
+ - cast_ibes_analyst: Parse and standardize analyst names
11
+ - clear_first_level_nones: Clean dictionaries by removing None values
12
+ - parse_multi_item: Parse complex multi-item strings
13
+ - pick_unique_dict: Remove duplicate dictionaries
14
+
15
+ Example:
16
+ >>> name = standardize("John. Doe, Smith")
17
+ >>> date = parse_date_standard("2023-01-01")
18
+ >>> analyst = cast_ibes_analyst("ADKINS/NARRA")
19
+ """
20
+
21
+ import json
22
+ import logging
23
+ import re
24
+ import time
25
+ from collections import defaultdict
26
+ from datetime import datetime
27
+
28
+ ORDINAL_SUFFIX = ["st", "nd", "rd", "th"]
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def standardize(k):
34
+ """Standardizes a string key by removing periods and splitting.
35
+
36
+ Handles comma and space-separated strings, normalizing their format.
37
+
38
+ Args:
39
+ k (str): Input string to be standardized.
40
+
41
+ Returns:
42
+ str: Cleaned and standardized string.
43
+
44
+ Example:
45
+ >>> standardize("John. Doe, Smith")
46
+ 'John,Doe,Smith'
47
+ >>> standardize("John Doe Smith")
48
+ 'John,Doe,Smith'
49
+ """
50
+ k = k.translate(str.maketrans({".": ""}))
51
+ # try to split by ", "
52
+ k = k.split(", ")
53
+ if len(k) < 2:
54
+ k = k[0].split(" ")
55
+ else:
56
+ k[1] = k[1].translate(str.maketrans({" ": ""}))
57
+ return ",".join(k)
58
+
59
+
60
+ def parse_date_standard(input_str):
61
+ """Parse a date string in YYYY-MM-DD format.
62
+
63
+ Args:
64
+ input_str (str): Date string in YYYY-MM-DD format.
65
+
66
+ Returns:
67
+ tuple: (year, month, day) as integers.
68
+
69
+ Example:
70
+ >>> parse_date_standard("2023-01-01")
71
+ (2023, 1, 1)
72
+ """
73
+ dt = datetime.strptime(input_str, "%Y-%m-%d")
74
+ return dt.year, dt.month, dt.day
75
+
76
+
77
+ def parse_date_conf(input_str):
78
+ """Parse a date string in YYYYMMDD format.
79
+
80
+ Args:
81
+ input_str (str): Date string in YYYYMMDD format.
82
+
83
+ Returns:
84
+ tuple: (year, month, day) as integers.
85
+
86
+ Example:
87
+ >>> parse_date_conf("20230101")
88
+ (2023, 1, 1)
89
+ """
90
+ dt = datetime.strptime(input_str, "%Y%m%d")
91
+ return dt.year, dt.month, dt.day
92
+
93
+
94
+ def parse_date_ibes(date0, time0):
95
+ """Converts IBES date and time to ISO 8601 format datetime.
96
+
97
+ Args:
98
+ date0 (str/int): Date in YYYYMMDD format.
99
+ time0 (str): Time in HH:MM:SS format.
100
+
101
+ Returns:
102
+ str: Datetime in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ).
103
+
104
+ Example:
105
+ >>> parse_date_ibes(20160126, "9:35:52")
106
+ '2016-01-26T09:35:52Z'
107
+ """
108
+ date0 = str(date0)
109
+ year, month, day = date0[:4], date0[4:6], date0[6:]
110
+ full_datetime = f"{year}-{month}-{day}T{time0}Z"
111
+
112
+ return full_datetime
113
+
114
+
115
+ def parse_date_yahoo(date0):
116
+ """Convert Yahoo Finance date to ISO 8601 format.
117
+
118
+ Args:
119
+ date0 (str): Date in YYYY-MM-DD format.
120
+
121
+ Returns:
122
+ str: Datetime in ISO 8601 format with noon time.
123
+
124
+ Example:
125
+ >>> parse_date_yahoo("2023-01-01")
126
+ '2023-01-01T12:00:00Z'
127
+ """
128
+ full_datetime = f"{date0}T12:00:00Z"
129
+ return full_datetime
130
+
131
+
132
+ def round_str(x, **kwargs):
133
+ """Round a string number to specified precision.
134
+
135
+ Args:
136
+ x (str): String representation of a number.
137
+ **kwargs: Additional arguments for round() function.
138
+
139
+ Returns:
140
+ float: Rounded number.
141
+
142
+ Example:
143
+ >>> round_str("3.14159", ndigits=2)
144
+ 3.14
145
+ """
146
+ return round(float(x), **kwargs)
147
+
148
+
149
+ def parse_date_standard_to_epoch(input_str):
150
+ """Convert standard date string to Unix epoch timestamp.
151
+
152
+ Args:
153
+ input_str (str): Date string in YYYY-MM-DD format.
154
+
155
+ Returns:
156
+ float: Unix epoch timestamp.
157
+
158
+ Example:
159
+ >>> parse_date_standard_to_epoch("2023-01-01")
160
+ 1672531200.0
161
+ """
162
+ dt = datetime.strptime(input_str, "%Y-%m-%d").timetuple()
163
+ timestamp = time.mktime(dt)
164
+ return timestamp
165
+
166
+
167
+ def cast_ibes_analyst(s):
168
+ """Splits and normalizes analyst name strings.
169
+
170
+ Handles various name formats like 'ADKINS/NARRA' or 'ARFSTROM J'.
171
+
172
+ Args:
173
+ s (str): Analyst name string.
174
+
175
+ Returns:
176
+ tuple: (last_name, first_initial)
177
+
178
+ Examples:
179
+ >>> cast_ibes_analyst('ADKINS/NARRA')
180
+ ('ADKINS', 'N')
181
+ >>> cast_ibes_analyst('ARFSTROM J')
182
+ ('ARFSTROM', 'J')
183
+ """
184
+ if " " in s or "\t" in s:
185
+ r = s.split()[:2]
186
+ if len(r) < 2:
187
+ return r[0], ""
188
+ else:
189
+ return r[0], r[1][:1]
190
+ else:
191
+ r = s.split("/")
192
+ if s.startswith("/"):
193
+ r = r[1:3]
194
+ else:
195
+ r = r[:2]
196
+ if len(r) < 2:
197
+ return r[0], ""
198
+ else:
199
+ return r[0], r[1][:1]
200
+
201
+
202
+ def parse_date_reference(input_str):
203
+ """Extract year from a date reference string.
204
+
205
+ Args:
206
+ input_str (str): Date reference string.
207
+
208
+ Returns:
209
+ int: Year from the date reference.
210
+
211
+ Example:
212
+ >>> parse_date_reference("1923, May 10")
213
+ 1923
214
+ """
215
+ return _parse_date_reference(input_str)["year"]
216
+
217
+
218
+ def _parse_date_reference(input_str):
219
+ """Parse complex, human-written date references.
220
+
221
+ Handles various date formats like:
222
+ - "1923, May 10"
223
+ - "1923, July"
224
+ - "1921, Sept"
225
+ - "1935-36"
226
+ - "1926, December 24th"
227
+
228
+ Args:
229
+ input_str (str): Date string in various formats.
230
+
231
+ Returns:
232
+ dict: Parsed date information with keys 'year', optional 'month', 'day'.
233
+
234
+ Example:
235
+ >>> _parse_date_reference("1923, May 10")
236
+ {'year': 1923, 'month': 5, 'day': 10}
237
+ """
238
+ if "," in input_str:
239
+ if len(input_str.split(" ")) == 3:
240
+ if input_str[-2:] in ORDINAL_SUFFIX:
241
+ input_str = input_str[:-2]
242
+ try:
243
+ dt = datetime.strptime(input_str, "%Y, %B %d")
244
+ return {"year": dt.year, "month": dt.month, "day": dt.day}
245
+ except:
246
+ try:
247
+ aux = input_str.split(" ")
248
+ input_str = " ".join([aux[0]] + [aux[1][:3]] + [aux[2]])
249
+ dt = datetime.strptime(input_str, "%Y, %b %d")
250
+ return {"year": dt.year, "month": dt.month, "day": dt.day}
251
+ except:
252
+ return {"year": input_str}
253
+ else:
254
+ try:
255
+ dt = datetime.strptime(input_str, "%Y, %B")
256
+ return {"year": dt.year, "month": dt.month}
257
+ except:
258
+ try:
259
+ aux = input_str.split(" ")
260
+ input_str = " ".join([aux[0]] + [aux[1][:3]])
261
+ dt = datetime.strptime(input_str, "%Y, %b")
262
+ return {"year": dt.year, "month": dt.month}
263
+ except:
264
+ return {"year": input_str}
265
+ else:
266
+ try:
267
+ dt = datetime.strptime(input_str[:4], "%Y")
268
+ return {"year": dt.year}
269
+ except:
270
+ return {"year": input_str}
271
+
272
+
273
+ def try_int(x):
274
+ """Attempt to convert a value to integer.
275
+
276
+ Args:
277
+ x: Value to convert.
278
+
279
+ Returns:
280
+ int or original value: Integer if conversion successful, original value otherwise.
281
+
282
+ Example:
283
+ >>> try_int("123")
284
+ 123
285
+ >>> try_int("abc")
286
+ 'abc'
287
+ """
288
+ try:
289
+ x = int(x)
290
+ return x
291
+ except:
292
+ return x
293
+
294
+
295
+ def clear_first_level_nones(docs, keys_keep_nones=None):
296
+ """Removes None values from dictionaries, with optional key exceptions.
297
+
298
+ Args:
299
+ docs (list): List of dictionaries to clean.
300
+ keys_keep_nones (list, optional): Keys to keep even if their value is None.
301
+
302
+ Returns:
303
+ list: Cleaned list of dictionaries.
304
+
305
+ Example:
306
+ >>> docs = [{"a": 1, "b": None}, {"a": None, "b": 2}]
307
+ >>> clear_first_level_nones(docs, keys_keep_nones=["a"])
308
+ [{"a": 1}, {"a": None, "b": 2}]
309
+ """
310
+ docs = [
311
+ {k: v for k, v in tdict.items() if v or k in keys_keep_nones} for tdict in docs
312
+ ]
313
+ return docs
314
+
315
+
316
+ def parse_multi_item(s, mapper: dict, direct: list):
317
+ """Parses complex multi-item strings into structured data.
318
+
319
+ Supports parsing strings with quoted or bracketed items.
320
+
321
+ Args:
322
+ s (str): Input string to parse.
323
+ mapper (dict): Mapping of input keys to output keys.
324
+ direct (list): Direct keys to extract.
325
+
326
+ Returns:
327
+ defaultdict: Parsed items with lists as values.
328
+
329
+ Example:
330
+ >>> s = '[name: John, age: 30] [name: Jane, age: 25]'
331
+ >>> mapper = {"name": "full_name"}
332
+ >>> direct = ["age"]
333
+ >>> parse_multi_item(s, mapper, direct)
334
+ defaultdict(list, {'full_name': ['John', 'Jane'], 'age': ['30', '25']})
335
+ """
336
+ if "'" in s:
337
+ items_str = re.findall(r"\"(.*?)\"", s) + re.findall(r"\'(.*?)\'", s)
338
+ else:
339
+ # remove brackets
340
+ items_str = re.findall(r"\[([^]]+)", s)[0].split()
341
+ r: defaultdict[str, list] = defaultdict(list)
342
+ for item in items_str:
343
+ doc0 = [ss.strip().split(":") for ss in item.split(",")]
344
+ if all([len(x) == 2 for x in doc0]):
345
+ doc0_dict = dict(doc0)
346
+ for n_init, n_final in mapper.items():
347
+ try:
348
+ r[n_final] += [doc0_dict[n_init]]
349
+ except KeyError:
350
+ r[n_final] += [None]
351
+
352
+ for n_final in direct:
353
+ try:
354
+ r[n_final] += [doc0_dict[n_final]]
355
+ except KeyError:
356
+ r[n_final] += [None]
357
+ else:
358
+ for key, value in zip(direct, doc0):
359
+ r[key] += [value]
360
+
361
+ return r
362
+
363
+
364
+ def pick_unique_dict(docs):
365
+ """Removes duplicate dictionaries from a list.
366
+
367
+ Uses JSON serialization to identify unique dictionaries.
368
+
369
+ Args:
370
+ docs (list): List of dictionaries.
371
+
372
+ Returns:
373
+ list: List of unique dictionaries.
374
+
375
+ Example:
376
+ >>> docs = [{"a": 1}, {"a": 1}, {"b": 2}]
377
+ >>> pick_unique_dict(docs)
378
+ [{"a": 1}, {"b": 2}]
379
+ """
380
+ docs = {json.dumps(d, sort_keys=True) for d in docs}
381
+ docs = [json.loads(t) for t in docs]
382
+ return docs
383
+
384
+
385
+ def split_keep_part(s: str, sep="/", keep=-1) -> str:
386
+ """Split a string and keep specified parts.
387
+
388
+ Args:
389
+ s (str): String to split.
390
+ sep (str): Separator to split on.
391
+ keep (int or list): Index or indices to keep.
392
+
393
+ Returns:
394
+ str: Joined string of kept parts.
395
+
396
+ Example:
397
+ >>> split_keep_part("a/b/c", keep=0)
398
+ 'a'
399
+ >>> split_keep_part("a/b/c", keep=[0, 2])
400
+ 'a/c'
401
+ """
402
+ if isinstance(keep, list):
403
+ items = s.split(sep)
404
+ return sep.join(items[k] for k in keep)
405
+ else:
406
+ return s.split(sep)[keep]