fabricatio 0.2.1.dev0__cp313-cp313-win_amd64.whl → 0.3.14.dev5__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. fabricatio/__init__.py +12 -20
  2. fabricatio/actions/__init__.py +1 -5
  3. fabricatio/actions/article.py +319 -0
  4. fabricatio/actions/article_rag.py +416 -0
  5. fabricatio/actions/fs.py +25 -0
  6. fabricatio/actions/output.py +248 -0
  7. fabricatio/actions/rag.py +96 -0
  8. fabricatio/actions/rules.py +83 -0
  9. fabricatio/capabilities/__init__.py +1 -0
  10. fabricatio/capabilities/advanced_judge.py +20 -0
  11. fabricatio/capabilities/advanced_rag.py +61 -0
  12. fabricatio/capabilities/censor.py +105 -0
  13. fabricatio/capabilities/check.py +212 -0
  14. fabricatio/capabilities/correct.py +228 -0
  15. fabricatio/capabilities/extract.py +74 -0
  16. fabricatio/capabilities/persist.py +103 -0
  17. fabricatio/capabilities/propose.py +65 -0
  18. fabricatio/capabilities/rag.py +263 -0
  19. fabricatio/capabilities/rating.py +404 -0
  20. fabricatio/capabilities/review.py +114 -0
  21. fabricatio/capabilities/task.py +113 -0
  22. fabricatio/decorators.py +251 -179
  23. fabricatio/{core.py → emitter.py} +31 -21
  24. fabricatio/fs/__init__.py +32 -2
  25. fabricatio/fs/curd.py +32 -9
  26. fabricatio/fs/readers.py +44 -7
  27. fabricatio/journal.py +3 -19
  28. fabricatio/models/action.py +185 -61
  29. fabricatio/models/adv_kwargs_types.py +63 -0
  30. fabricatio/models/extra/__init__.py +1 -0
  31. fabricatio/models/extra/advanced_judge.py +32 -0
  32. fabricatio/models/extra/aricle_rag.py +284 -0
  33. fabricatio/models/extra/article_base.py +422 -0
  34. fabricatio/models/extra/article_essence.py +101 -0
  35. fabricatio/models/extra/article_main.py +284 -0
  36. fabricatio/models/extra/article_outline.py +46 -0
  37. fabricatio/models/extra/article_proposal.py +52 -0
  38. fabricatio/models/extra/patches.py +20 -0
  39. fabricatio/models/extra/problem.py +165 -0
  40. fabricatio/models/extra/rag.py +98 -0
  41. fabricatio/models/extra/rule.py +52 -0
  42. fabricatio/models/generic.py +704 -36
  43. fabricatio/models/kwargs_types.py +112 -17
  44. fabricatio/models/role.py +74 -27
  45. fabricatio/models/task.py +94 -60
  46. fabricatio/models/tool.py +328 -188
  47. fabricatio/models/usages.py +791 -515
  48. fabricatio/parser.py +81 -60
  49. fabricatio/rust.cp313-win_amd64.pyd +0 -0
  50. fabricatio/rust.pyi +886 -0
  51. fabricatio/toolboxes/__init__.py +1 -3
  52. fabricatio/toolboxes/fs.py +17 -1
  53. fabricatio/utils.py +156 -0
  54. fabricatio/workflows/__init__.py +1 -0
  55. fabricatio/workflows/articles.py +24 -0
  56. fabricatio/workflows/rag.py +11 -0
  57. fabricatio-0.3.14.dev5.data/scripts/tdown.exe +0 -0
  58. fabricatio-0.3.14.dev5.data/scripts/ttm.exe +0 -0
  59. fabricatio-0.3.14.dev5.dist-info/METADATA +188 -0
  60. fabricatio-0.3.14.dev5.dist-info/RECORD +64 -0
  61. {fabricatio-0.2.1.dev0.dist-info → fabricatio-0.3.14.dev5.dist-info}/WHEEL +1 -1
  62. fabricatio/_rust.cp313-win_amd64.pyd +0 -0
  63. fabricatio/_rust.pyi +0 -53
  64. fabricatio/_rust_instances.py +0 -8
  65. fabricatio/actions/communication.py +0 -15
  66. fabricatio/actions/transmission.py +0 -23
  67. fabricatio/config.py +0 -263
  68. fabricatio/models/advanced.py +0 -128
  69. fabricatio/models/events.py +0 -82
  70. fabricatio/models/utils.py +0 -78
  71. fabricatio/toolboxes/task.py +0 -6
  72. fabricatio-0.2.1.dev0.data/scripts/tdown.exe +0 -0
  73. fabricatio-0.2.1.dev0.dist-info/METADATA +0 -420
  74. fabricatio-0.2.1.dev0.dist-info/RECORD +0 -35
  75. {fabricatio-0.2.1.dev0.dist-info → fabricatio-0.3.14.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -1,42 +1,315 @@
1
- """This module defines generic classes for models in the Fabricatio library."""
1
+ """This module defines generic classes for models in the Fabricatio library, providing a foundation for various model functionalities."""
2
2
 
3
+ from abc import ABC, abstractmethod
3
4
  from pathlib import Path
4
- from typing import List, Self
5
-
6
- import orjson
7
- from fabricatio._rust import blake3_hash
8
- from fabricatio._rust_instances import template_manager
9
- from fabricatio.config import configs
10
- from fabricatio.fs.readers import magika, safe_text_read
5
+ from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Self, Sequence, Type, Union, final, overload
6
+
7
+ import ujson
8
+ from fabricatio.fs import dump_text
9
+ from fabricatio.fs.readers import safe_text_read
10
+ from fabricatio.journal import logger
11
+ from fabricatio.rust import CONFIG, TEMPLATE_MANAGER, blake3_hash, detect_language
12
+ from fabricatio.utils import ok
11
13
  from pydantic import (
12
14
  BaseModel,
13
15
  ConfigDict,
14
16
  Field,
17
+ NonNegativeFloat,
18
+ PositiveFloat,
19
+ PositiveInt,
20
+ PrivateAttr,
21
+ SecretStr,
15
22
  )
23
+ from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue
24
+
16
25
 
26
+ class Base(BaseModel, ABC):
27
+ """Base class for all models with Pydantic configuration.
17
28
 
18
- class Base(BaseModel):
19
- """Base class for all models with Pydantic configuration."""
29
+ This class sets up the basic Pydantic configuration for all models in the Fabricatio library.
30
+ The `model_config` uses `use_attribute_docstrings=True` to ensure field descriptions are
31
+ pulled from the attribute's docstring instead of the default Pydantic behavior.
32
+ """
20
33
 
21
34
  model_config = ConfigDict(use_attribute_docstrings=True)
22
35
 
23
36
 
24
- class Named(Base):
25
- """Class that includes a name attribute."""
37
+ class Display(Base, ABC):
38
+ """Class that provides formatted JSON representation utilities.
39
+
40
+ Provides methods to generate both pretty-printed and compact JSON representations of the model.
41
+ Used for debugging and logging purposes.
42
+ """
43
+
44
+ def display(self) -> str:
45
+ """Generate pretty-printed JSON representation.
46
+
47
+ Returns:
48
+ str: JSON string with 1-level indentation for readability
49
+ """
50
+ return self.model_dump_json(indent=1, by_alias=True)
51
+
52
+ def compact(self) -> str:
53
+ """Generate compact JSON representation.
54
+
55
+ Returns:
56
+ str: Minified JSON string without whitespace
57
+ """
58
+ return self.model_dump_json(by_alias=True)
59
+
60
+ @staticmethod
61
+ def seq_display(seq: Iterable["Display"], compact: bool = False) -> str:
62
+ """Generate formatted display for sequence of Display objects.
63
+
64
+ Args:
65
+ seq (Iterable[Display]): Sequence of objects to display
66
+ compact (bool): Use compact format instead of pretty print
67
+
68
+ Returns:
69
+ str: Combined display output with boundary markers
70
+ """
71
+ return (
72
+ "--- Start of Extra Info Sequence ---"
73
+ + "\n".join(d.compact() if compact else d.display() for d in seq)
74
+ + "--- End of Extra Info Sequence ---"
75
+ )
26
76
 
27
- name: str = Field(frozen=True)
28
- """The name of the object."""
29
77
 
78
+ class Named(Base, ABC):
79
+ """Class that includes a name attribute.
30
80
 
31
- class Described(Base):
32
- """Class that includes a description attribute."""
81
+ This class adds a name attribute to models, which is intended to be a unique identifier.
82
+ """
33
83
 
34
- description: str = Field(default="", frozen=True)
35
- """The description of the object."""
84
+ name: str
85
+ """The name of this object,briefly and conclusively."""
36
86
 
37
87
 
38
- class WithBriefing(Named, Described):
39
- """Class that provides a briefing based on the name and description."""
88
+ class Described(Base, ABC):
89
+ """Class that includes a description attribute.
90
+
91
+ This class adds a description attribute to models, providing additional context or information.
92
+ """
93
+
94
+ description: str
95
+ """A comprehensive description of this object, including its purpose, scope, and context.
96
+ This should clearly explain what this object is about, why it exists, and in what situations
97
+ it applies. The description should be detailed enough to provide full understanding of
98
+ this object's intent and application."""
99
+
100
+
101
+ class Titled(Base, ABC):
102
+ """Class that includes a title attribute."""
103
+
104
+ title: str
105
+ """The title of this object, make it professional and concise.No prefixed heading number should be included."""
106
+
107
+
108
+ class WordCount(Base, ABC):
109
+ """Class that includes a word count attribute."""
110
+
111
+ expected_word_count: int
112
+ """Expected word count of this research component."""
113
+
114
+ @property
115
+ def exact_word_count(self) -> int:
116
+ """Get the exact word count of this research component."""
117
+ raise NotImplementedError(f"`expected_word_count` is not implemented for {self.__class__.__name__}")
118
+
119
+
120
+ class FromMapping:
121
+ """Class that provides a method to generate a list of objects from a mapping."""
122
+
123
+ @classmethod
124
+ @abstractmethod
125
+ def from_mapping[S](cls: S, mapping: Mapping[str, Any], **kwargs: Any) -> List[S]:
126
+ """Generate a list of objects from a mapping."""
127
+
128
+
129
+ class FromSequence:
130
+ """Class that provides a method to generate a list of objects from a sequence."""
131
+
132
+ @classmethod
133
+ @abstractmethod
134
+ def from_sequence[S](cls: S, sequence: Sequence[Any], **kwargs: Any) -> List[S]:
135
+ """Generate a list of objects from a sequence."""
136
+
137
+
138
+ class AsPrompt:
139
+ """Class that provides a method to generate a prompt from the model.
140
+
141
+ This class includes a method to generate a prompt based on the model's attributes.
142
+ """
143
+
144
+ @final
145
+ def as_prompt(self) -> str:
146
+ """Generate a prompt from the model.
147
+
148
+ Returns:
149
+ str: The generated prompt.
150
+ """
151
+ return TEMPLATE_MANAGER.render_template(
152
+ CONFIG.templates.as_prompt_template,
153
+ self._as_prompt_inner(),
154
+ )
155
+
156
+ @abstractmethod
157
+ def _as_prompt_inner(self) -> Dict[str, str]:
158
+ """Generate the inner part of the prompt.
159
+
160
+ This method should be implemented by subclasses to provide the specific data for the prompt.
161
+
162
+ Returns:
163
+ Dict[str, str]: The data for the prompt.
164
+ """
165
+
166
+
167
+ class WithRef[T](Base, ABC):
168
+ """Class that provides a reference to another object.
169
+
170
+ This class manages a reference to another object, allowing for easy access and updates.
171
+ """
172
+
173
+ _reference: Optional[T] = PrivateAttr(None)
174
+
175
+ @property
176
+ def referenced(self) -> T:
177
+ """Get the referenced object.
178
+
179
+ Returns:
180
+ T: The referenced object.
181
+
182
+ Raises:
183
+ ValueError: If the reference is not set.
184
+ """
185
+ return ok(
186
+ self._reference, f"`{self.__class__.__name__}`'s `_reference` field is None. Have you called `update_ref`?"
187
+ )
188
+
189
+ @overload
190
+ def update_ref[S: WithRef](self: S, reference: T) -> S: ...
191
+
192
+ @overload
193
+ def update_ref[S: WithRef](self: S, reference: "WithRef[T]") -> S: ...
194
+
195
+ @overload
196
+ def update_ref[S: WithRef](self: S, reference: None = None) -> S: ...
197
+
198
+ def update_ref[S: WithRef](self: S, reference: Union[T, "WithRef[T]", None] = None) -> S:
199
+ """Update the reference of the object.
200
+
201
+ Args:
202
+ reference (Union[T, WithRef[T], None]): The new reference to set.
203
+
204
+ Returns:
205
+ S: The current instance with the updated reference.
206
+ """
207
+ if isinstance(reference, self.__class__):
208
+ self._reference = reference.referenced
209
+ else:
210
+ self._reference = reference # pyright: ignore [reportAttributeAccessIssue]
211
+ return self
212
+
213
+
214
+ class Language:
215
+ """Class that provides a language attribute."""
216
+
217
+ @property
218
+ def language(self) -> str:
219
+ """Get the language of the object."""
220
+ if isinstance(self, Described) and self.description:
221
+ return detect_language(self.description)
222
+ if isinstance(self, Titled) and self.title:
223
+ return detect_language(self.title)
224
+ if isinstance(self, Named) and self.name:
225
+ return detect_language(self.name)
226
+ raise RuntimeError(f"Cannot determine language! class that not support language: {self.__class__.__name__}")
227
+
228
+
229
+ class ModelHash(Base, ABC):
230
+ """Class that provides a hash value for the object.
231
+
232
+ This class includes a method to calculate a hash value for the object based on its JSON representation.
233
+ """
234
+
235
+ def __hash__(self) -> int:
236
+ """Calculates a hash value for the object based on its model_dump_json representation.
237
+
238
+ Returns:
239
+ int: The hash value of the object.
240
+ """
241
+ return hash(self.model_dump_json())
242
+
243
+
244
+ class UpdateFrom(ABC):
245
+ """Class that provides a method to update the object from another object.
246
+
247
+ This class includes methods to update the current object with the attributes of another object.
248
+ """
249
+
250
+ def update_pre_check(self, other: Self) -> Self:
251
+ """Pre-check for updating the object from another object.
252
+
253
+ Args:
254
+ other (Self): The other object to update from.
255
+
256
+ Returns:
257
+ Self: The current instance after pre-check.
258
+
259
+ Raises:
260
+ TypeError: If the other object is not of the same type.
261
+ """
262
+ if not isinstance(other, self.__class__):
263
+ raise TypeError(f"Cannot update from a non-{self.__class__.__name__} instance.")
264
+
265
+ return self
266
+
267
+ @abstractmethod
268
+ def update_from_inner(self, other: Self) -> Self:
269
+ """Updates the current instance with the attributes of another instance.
270
+
271
+ This method should be implemented by subclasses to provide the specific update logic.
272
+
273
+ Args:
274
+ other (Self): The other instance to update from.
275
+
276
+ Returns:
277
+ Self: The current instance with updated attributes.
278
+ """
279
+
280
+ @final
281
+ def update_from(self, other: Self) -> Self:
282
+ """Updates the current instance with the attributes of another instance.
283
+
284
+ Args:
285
+ other (Self): The other instance to update from.
286
+
287
+ Returns:
288
+ Self: The current instance with updated attributes.
289
+ """
290
+ return self.update_pre_check(other).update_from_inner(other)
291
+
292
+
293
+ class Introspect(ABC):
294
+ """Class that provides a method to introspect the object.
295
+
296
+ This class includes a method to perform internal introspection of the object.
297
+ """
298
+
299
+ @abstractmethod
300
+ def introspect(self) -> str:
301
+ """Internal introspection of the object.
302
+
303
+ Returns:
304
+ str: The internal introspection of the object.
305
+ """
306
+
307
+
308
+ class WithBriefing(Named, Described, ABC):
309
+ """Class that provides a briefing based on the name and description.
310
+
311
+ This class combines the name and description attributes to provide a brief summary of the object.
312
+ """
40
313
 
41
314
  @property
42
315
  def briefing(self) -> str:
@@ -48,24 +321,157 @@ class WithBriefing(Named, Described):
48
321
  return f"{self.name}: {self.description}" if self.description else self.name
49
322
 
50
323
 
51
- class WithJsonExample(Base):
52
- """Class that provides a JSON schema for the model."""
324
+ class UnsortGenerate(GenerateJsonSchema):
325
+ """Class that provides a reverse JSON schema of the model.
326
+
327
+ This class overrides the sorting behavior of the JSON schema generation to maintain the original order.
328
+ """
329
+
330
+ def sort(self, value: JsonSchemaValue, parent_key: str | None = None) -> JsonSchemaValue:
331
+ """Not sort.
332
+
333
+ Args:
334
+ value (JsonSchemaValue): The JSON schema value to sort.
335
+ parent_key (str | None): The parent key of the JSON schema value.
336
+
337
+ Returns:
338
+ JsonSchemaValue: The JSON schema value without sorting.
339
+ """
340
+ return value
341
+
342
+
343
+ class WithFormatedJsonSchema(Base, ABC):
344
+ """Class that provides a formatted JSON schema of the model.
345
+
346
+ This class includes a method to generate a formatted JSON schema of the model.
347
+ """
53
348
 
54
349
  @classmethod
55
- def json_example(cls) -> str:
56
- """Return a JSON example for the model.
350
+ def formated_json_schema(cls) -> str:
351
+ """Get the JSON schema of the model in a formatted string.
57
352
 
58
353
  Returns:
59
- str: A JSON example for the model.
354
+ str: The JSON schema of the model in a formatted string.
60
355
  """
61
- return orjson.dumps(
62
- {field_name: field_info.description for field_name, field_info in cls.model_fields.items()},
63
- option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS,
64
- ).decode()
356
+ return ujson.dumps(
357
+ cls.model_json_schema(schema_generator=UnsortGenerate), indent=2, ensure_ascii=False, sort_keys=False
358
+ )
65
359
 
66
360
 
67
- class WithDependency(Base):
68
- """Class that manages file dependencies."""
361
+ class CreateJsonObjPrompt(WithFormatedJsonSchema, ABC):
362
+ """Class that provides a prompt for creating a JSON object.
363
+
364
+ This class includes a method to create a prompt for creating a JSON object based on the model's schema and a requirement.
365
+ """
366
+
367
+ @classmethod
368
+ @overload
369
+ def create_json_prompt(cls, requirement: List[str]) -> List[str]: ...
370
+
371
+ @classmethod
372
+ @overload
373
+ def create_json_prompt(cls, requirement: str) -> str: ...
374
+
375
+ @classmethod
376
+ def create_json_prompt(cls, requirement: str | List[str]) -> str | List[str]:
377
+ """Create the prompt for creating a JSON object with given requirement.
378
+
379
+ Args:
380
+ requirement (str | List[str]): The requirement for the JSON object.
381
+
382
+ Returns:
383
+ str | List[str]: The prompt for creating a JSON object with given requirement.
384
+ """
385
+ if isinstance(requirement, str):
386
+ return TEMPLATE_MANAGER.render_template(
387
+ CONFIG.templates.create_json_obj_template,
388
+ {"requirement": requirement, "json_schema": cls.formated_json_schema()},
389
+ )
390
+ return [
391
+ TEMPLATE_MANAGER.render_template(
392
+ CONFIG.templates.create_json_obj_template,
393
+ {"requirement": r, "json_schema": cls.formated_json_schema()},
394
+ )
395
+ for r in requirement
396
+ ]
397
+
398
+
399
+ class InstantiateFromString(Base, ABC):
400
+ """Class that provides a method to instantiate the class from a string.
401
+
402
+ This class includes a method to instantiate the class from a JSON string representation.
403
+ """
404
+
405
+ @classmethod
406
+ def instantiate_from_string(cls, string: str) -> Self | None:
407
+ """Instantiate the class from a string.
408
+
409
+ Args:
410
+ string (str): The string to instantiate the class from.
411
+
412
+ Returns:
413
+ Self | None: The instance of the class or None if the string is not valid.
414
+ """
415
+ from fabricatio.parser import JsonCapture
416
+
417
+ obj = JsonCapture.convert_with(string, cls.model_validate_json)
418
+ logger.debug(f"Instantiate `{cls.__name__}` from string, {'Failed' if obj is None else 'Success'}.")
419
+ return obj
420
+
421
+
422
+ class ProposedAble(CreateJsonObjPrompt, InstantiateFromString, ABC):
423
+ """Class that provides a method to propose a JSON object based on the requirement.
424
+
425
+ This class combines the functionality to create a prompt for a JSON object and instantiate it from a string.
426
+ """
427
+
428
+
429
+ class SketchedAble(ProposedAble, Display, ABC):
430
+ """Class that provides a method to scratch the object.
431
+
432
+ This class combines the functionality to propose a JSON object, instantiate it from a string, and display it.
433
+ """
434
+
435
+
436
+ class ProposedUpdateAble(SketchedAble, UpdateFrom, ABC):
437
+ """Make the obj can be updated from the proposed obj in place.
438
+
439
+ This class provides the ability to update an object in place from a proposed object.
440
+ """
441
+
442
+
443
+ class FinalizedDumpAble(Base, ABC):
444
+ """Class that provides a method to finalize the dump of the object.
445
+
446
+ This class includes methods to finalize the JSON representation of the object and dump it to a file.
447
+ """
448
+
449
+ def finalized_dump(self) -> str:
450
+ """Finalize the dump of the object.
451
+
452
+ Returns:
453
+ str: The finalized dump of the object.
454
+ """
455
+ return self.model_dump_json(indent=1, by_alias=True)
456
+
457
+ def finalized_dump_to(self, path: str | Path) -> Self:
458
+ """Finalize the dump of the object to a file.
459
+
460
+ Args:
461
+ path (str | Path): The path to save the finalized dump.
462
+
463
+ Returns:
464
+ Self: The current instance of the object.
465
+ """
466
+ dump_text(path, self.finalized_dump())
467
+ return self
468
+
469
+
470
+ class WithDependency(Base, ABC):
471
+ """Class that manages file dependencies.
472
+
473
+ This class includes methods to manage file dependencies required for reading or writing.
474
+ """
69
475
 
70
476
  dependencies: List[str] = Field(default_factory=list)
71
477
  """The file dependencies which is needed to read or write to meet a specific requirement, a list of file paths."""
@@ -108,17 +514,25 @@ class WithDependency(Base):
108
514
  self.dependencies.clear()
109
515
  return self
110
516
 
111
- def override_dependencies[P: str | Path](self, dependencies: List[P]) -> Self:
517
+ def override_dependencies[P: str | Path](self, dependencies: List[P] | P) -> Self:
112
518
  """Override the file dependencies of the task.
113
519
 
114
520
  Args:
115
- dependencies (List[str | Path]): The file dependencies to override the task's dependencies.
521
+ dependencies (List[str | Path] | str | Path): The file dependencies to override the task's dependencies.
116
522
 
117
523
  Returns:
118
524
  Self: The current instance of the task.
119
525
  """
120
526
  return self.clear_dependencies().add_dependency(dependencies)
121
527
 
528
+ def pop_dependence[T](self, idx: int = -1, reader: Callable[[str], T] = safe_text_read) -> T:
529
+ """Pop the file dependencies from the task.
530
+
531
+ Returns:
532
+ str: The popped file dependency
533
+ """
534
+ return reader(self.dependencies.pop(idx))
535
+
122
536
  @property
123
537
  def dependencies_prompt(self) -> str:
124
538
  """Generate a prompt for the task based on the file dependencies.
@@ -126,13 +540,15 @@ class WithDependency(Base):
126
540
  Returns:
127
541
  str: The generated prompt for the task.
128
542
  """
129
- return template_manager.render_template(
130
- configs.templates.dependencies_template,
543
+ from fabricatio.fs import MAGIKA
544
+
545
+ return TEMPLATE_MANAGER.render_template(
546
+ CONFIG.templates.dependencies_template,
131
547
  {
132
548
  (pth := Path(p)).name: {
133
549
  "path": pth.as_posix(),
134
550
  "exists": pth.exists(),
135
- "description": (identity := magika.identify_path(pth)).output.description,
551
+ "description": (identity := MAGIKA.identify_path(pth)).output.description,
136
552
  "size": f"{pth.stat().st_size / (1024 * 1024) if pth.exists() and pth.is_file() else 0:.3f} MB",
137
553
  "content": (text := safe_text_read(pth)),
138
554
  "lines": len(text.splitlines()),
@@ -142,3 +558,255 @@ class WithDependency(Base):
142
558
  for p in self.dependencies
143
559
  },
144
560
  )
561
+
562
+
563
+ class Vectorizable(ABC):
564
+ """Class that prepares the vectorization of the model.
565
+
566
+ This class includes methods to prepare the model for vectorization, ensuring it fits within a specified token length.
567
+ """
568
+
569
+ @abstractmethod
570
+ def _prepare_vectorization_inner(self) -> str:
571
+ """Prepare the model for vectorization."""
572
+
573
+ @final
574
+ def prepare_vectorization(self, max_length: Optional[int] = None) -> str:
575
+ """Prepare the vectorization of the model.
576
+
577
+ Args:
578
+ max_length (Optional[int]): The maximum token length for the vectorization. Defaults to the configuration.
579
+
580
+ Returns:
581
+ str: The prepared vectorization of the model.
582
+
583
+ Raises:
584
+ ValueError: If the chunk exceeds the maximum sequence length.
585
+ """
586
+ from litellm.utils import token_counter
587
+
588
+ max_length = max_length or CONFIG.embedding.max_sequence_length
589
+ chunk = self._prepare_vectorization_inner()
590
+ if max_length and (length := token_counter(text=chunk)) > max_length:
591
+ raise ValueError(f"Chunk exceeds maximum sequence length {max_length}, got {length}, see \n{chunk}")
592
+
593
+ return chunk
594
+
595
+
596
+ class ScopedConfig(Base, ABC):
597
+ """Configuration holder with hierarchical fallback mechanism.
598
+
599
+ Manages LLM, embedding, and vector database configurations with fallback logic.
600
+ Allows configuration values to be overridden in a hierarchical manner.
601
+ """
602
+
603
+ llm_api_endpoint: Optional[str] = None
604
+ """The OpenAI API endpoint."""
605
+
606
+ llm_api_key: Optional[SecretStr] = None
607
+ """The OpenAI API key."""
608
+
609
+ llm_timeout: Optional[PositiveInt] = None
610
+ """The timeout of the LLM model."""
611
+
612
+ llm_max_retries: Optional[PositiveInt] = None
613
+ """The maximum number of retries."""
614
+
615
+ llm_model: Optional[str] = None
616
+ """The LLM model name."""
617
+
618
+ llm_temperature: Optional[NonNegativeFloat] = None
619
+ """The temperature of the LLM model."""
620
+
621
+ llm_stop_sign: Optional[str | List[str]] = None
622
+ """The stop sign of the LLM model."""
623
+
624
+ llm_top_p: Optional[NonNegativeFloat] = None
625
+ """The top p of the LLM model."""
626
+
627
+ llm_generation_count: Optional[PositiveInt] = None
628
+ """The number of generations to generate."""
629
+
630
+ llm_stream: Optional[bool] = None
631
+ """Whether to stream the LLM model's response."""
632
+
633
+ llm_max_tokens: Optional[PositiveInt] = None
634
+ """The maximum number of tokens to generate."""
635
+
636
+ llm_tpm: Optional[PositiveInt] = None
637
+ """The tokens per minute of the LLM model."""
638
+
639
+ llm_rpm: Optional[PositiveInt] = None
640
+ """The requests per minute of the LLM model."""
641
+
642
+ llm_presence_penalty: Optional[PositiveFloat] = None
643
+ """The presence penalty of the LLM model."""
644
+
645
+ llm_frequency_penalty: Optional[PositiveFloat] = None
646
+ """The frequency penalty of the LLM model."""
647
+
648
+ embedding_api_endpoint: Optional[str] = None
649
+ """The OpenAI API endpoint."""
650
+
651
+ embedding_api_key: Optional[SecretStr] = None
652
+ """The OpenAI API key."""
653
+
654
+ embedding_timeout: Optional[PositiveInt] = None
655
+ """The timeout of the LLM model."""
656
+
657
+ embedding_model: Optional[str] = None
658
+ """The LLM model name."""
659
+
660
+ embedding_max_sequence_length: Optional[PositiveInt] = None
661
+ """The maximum sequence length."""
662
+
663
+ embedding_dimensions: Optional[PositiveInt] = None
664
+ """The dimensions of the embedding."""
665
+
666
+ embedding_caching: Optional[bool] = False
667
+ """Whether to cache the embedding result."""
668
+
669
+ milvus_uri: Optional[str] = Field(default=None)
670
+ """The URI of the Milvus server."""
671
+
672
+ milvus_token: Optional[SecretStr] = Field(default=None)
673
+ """The token for the Milvus server."""
674
+
675
+ milvus_timeout: Optional[PositiveFloat] = Field(default=None)
676
+ """The timeout for the Milvus server."""
677
+
678
+ milvus_dimensions: Optional[PositiveInt] = Field(default=None)
679
+ """The dimensions of the Milvus server."""
680
+
681
+ @final
682
+ def fallback_to(self, other: Union["ScopedConfig", Any]) -> Self:
683
+ """Merge configuration values with fallback priority.
684
+
685
+ Copies non-null values from 'other' to self where current values are None.
686
+
687
+ Args:
688
+ other (ScopedConfig): Configuration to fallback to
689
+
690
+ Returns:
691
+ Self: Current instance with merged values
692
+ """
693
+ if not isinstance(other, ScopedConfig):
694
+ return self
695
+
696
+ # Iterate over the attribute names and copy values from 'other' to 'self' where applicable
697
+ # noinspection PydanticTypeChecker,PyTypeChecker
698
+ for attr_name in ScopedConfig.model_fields:
699
+ # Copy the attribute value from 'other' to 'self' only if 'self' has None and 'other' has a non-None value
700
+ if getattr(self, attr_name) is None and (attr := getattr(other, attr_name)) is not None:
701
+ setattr(self, attr_name, attr)
702
+
703
+ # Return the current instance to allow for method chaining
704
+ return self
705
+
706
+ @final
707
+ def hold_to(self, others: Union[Union["ScopedConfig", Any], Iterable[Union["ScopedConfig", Any]]]) -> Self:
708
+ """Propagate non-null values to other configurations.
709
+
710
+ Copies current non-null values to target configurations where they are None.
711
+
712
+ Args:
713
+ others (ScopedConfig|Iterable): Target configurations to update
714
+
715
+ Returns:
716
+ Self: Current instance unchanged
717
+ """
718
+ if not isinstance(others, Iterable):
719
+ others = [others]
720
+
721
+ for other in (o for o in others if isinstance(o, ScopedConfig)):
722
+ # noinspection PyTypeChecker,PydanticTypeChecker
723
+ for attr_name in ScopedConfig.model_fields:
724
+ if (attr := getattr(self, attr_name)) is not None and getattr(other, attr_name) is None:
725
+ setattr(other, attr_name, attr)
726
+ return self
727
+
728
+
729
+ class Patch[T](ProposedAble, ABC):
730
+ """Base class for patches.
731
+
732
+ This class provides a base implementation for patches that can be applied to other objects.
733
+ """
734
+
735
+ def apply(self, other: T) -> T:
736
+ """Apply the patch to another instance.
737
+
738
+ Args:
739
+ other (T): The instance to apply the patch to.
740
+
741
+ Returns:
742
+ T: The instance with the patch applied.
743
+
744
+ Raises:
745
+ ValueError: If a field in the patch is not found in the target instance.
746
+ """
747
+ for field in self.__class__.model_fields:
748
+ if not hasattr(other, field):
749
+ raise ValueError(f"{field} not found in {other}, are you applying to the wrong type?")
750
+ setattr(other, field, getattr(self, field))
751
+ return other
752
+
753
+ def as_kwargs(self) -> Dict[str, Any]:
754
+ """Get the kwargs of the patch."""
755
+ return self.model_dump()
756
+
757
+ @staticmethod
758
+ def ref_cls() -> Optional[Type[BaseModel]]:
759
+ """Get the reference class of the model."""
760
+ return None
761
+
762
+ @classmethod
763
+ def formated_json_schema(cls) -> str:
764
+ """Get the JSON schema of the model in a formatted string.
765
+
766
+ Returns:
767
+ str: The JSON schema of the model in a formatted string.
768
+ """
769
+ my_schema = cls.model_json_schema(schema_generator=UnsortGenerate)
770
+
771
+ ref_cls = cls.ref_cls()
772
+ if ref_cls is not None:
773
+ # copy the desc info of each corresponding fields from `ref_cls`
774
+ for field_name in [f for f in cls.model_fields if f in ref_cls.model_fields]:
775
+ my_schema["properties"][field_name]["description"] = (
776
+ ref_cls.model_fields[field_name].description or my_schema["properties"][field_name]["description"]
777
+ )
778
+ my_schema["description"] = ref_cls.__doc__
779
+
780
+ return ujson.dumps(my_schema, indent=2, ensure_ascii=False, sort_keys=False)
781
+
782
+
783
+ class SequencePatch[T](ProposedUpdateAble, ABC):
784
+ """Base class for patches.
785
+
786
+ This class provides a base implementation for patches that can be applied to sequences of objects.
787
+ """
788
+
789
+ tweaked: List[T]
790
+ """Tweaked content list"""
791
+
792
+ def update_from_inner(self, other: Self) -> Self:
793
+ """Updates the current instance with the attributes of another instance.
794
+
795
+ Args:
796
+ other (Self): The other instance to update from.
797
+
798
+ Returns:
799
+ Self: The current instance with updated attributes.
800
+ """
801
+ self.tweaked.clear()
802
+ self.tweaked.extend(other.tweaked)
803
+ return self
804
+
805
+ @classmethod
806
+ def default(cls) -> Self:
807
+ """Defaults to empty list.
808
+
809
+ Returns:
810
+ Self: A new instance with an empty list of tweaks.
811
+ """
812
+ return cls(tweaked=[])