lionagi 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lionagi/adapters/_utils.py +0 -14
  2. lionagi/libs/file/save.py +8 -1
  3. lionagi/ln/__init__.py +10 -0
  4. lionagi/ln/_json_dump.py +322 -49
  5. lionagi/ln/fuzzy/__init__.py +4 -1
  6. lionagi/ln/fuzzy/_fuzzy_validate.py +109 -0
  7. lionagi/ln/fuzzy/_to_dict.py +388 -0
  8. lionagi/models/__init__.py +0 -2
  9. lionagi/operations/brainstorm/brainstorm.py +10 -10
  10. lionagi/operations/communicate/communicate.py +1 -1
  11. lionagi/operations/parse/parse.py +1 -1
  12. lionagi/protocols/generic/element.py +5 -14
  13. lionagi/protocols/generic/log.py +2 -2
  14. lionagi/protocols/generic/pile.py +1 -1
  15. lionagi/protocols/messages/message.py +8 -1
  16. lionagi/protocols/operatives/operative.py +2 -2
  17. lionagi/service/connections/endpoint.py +7 -0
  18. lionagi/service/connections/match_endpoint.py +2 -10
  19. lionagi/service/connections/providers/types.py +1 -3
  20. lionagi/service/hooks/hook_event.py +1 -1
  21. lionagi/service/hooks/hook_registry.py +1 -1
  22. lionagi/service/rate_limited_processor.py +1 -1
  23. lionagi/session/branch.py +1 -101
  24. lionagi/session/session.py +9 -14
  25. lionagi/utils.py +3 -334
  26. lionagi/version.py +1 -1
  27. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/METADATA +3 -13
  28. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/RECORD +30 -78
  29. lionagi/adapters/postgres_model_adapter.py +0 -131
  30. lionagi/libs/concurrency.py +0 -1
  31. lionagi/libs/file/params.py +0 -175
  32. lionagi/libs/nested/__init__.py +0 -3
  33. lionagi/libs/nested/flatten.py +0 -172
  34. lionagi/libs/nested/nfilter.py +0 -59
  35. lionagi/libs/nested/nget.py +0 -45
  36. lionagi/libs/nested/ninsert.py +0 -104
  37. lionagi/libs/nested/nmerge.py +0 -158
  38. lionagi/libs/nested/npop.py +0 -69
  39. lionagi/libs/nested/nset.py +0 -94
  40. lionagi/libs/nested/unflatten.py +0 -83
  41. lionagi/libs/nested/utils.py +0 -189
  42. lionagi/libs/parse.py +0 -31
  43. lionagi/libs/schema/json_schema.py +0 -231
  44. lionagi/libs/token_transform/__init__.py +0 -0
  45. lionagi/libs/token_transform/base.py +0 -54
  46. lionagi/libs/token_transform/llmlingua.py +0 -1
  47. lionagi/libs/token_transform/perplexity.py +0 -450
  48. lionagi/libs/token_transform/symbolic_compress_context.py +0 -152
  49. lionagi/libs/token_transform/synthlang.py +0 -9
  50. lionagi/libs/token_transform/synthlang_/base.py +0 -128
  51. lionagi/libs/token_transform/synthlang_/resources/frameworks/abstract_algebra.toml +0 -11
  52. lionagi/libs/token_transform/synthlang_/resources/frameworks/category_theory.toml +0 -11
  53. lionagi/libs/token_transform/synthlang_/resources/frameworks/complex_analysis.toml +0 -11
  54. lionagi/libs/token_transform/synthlang_/resources/frameworks/framework_options.json +0 -52
  55. lionagi/libs/token_transform/synthlang_/resources/frameworks/group_theory.toml +0 -11
  56. lionagi/libs/token_transform/synthlang_/resources/frameworks/math_logic.toml +0 -11
  57. lionagi/libs/token_transform/synthlang_/resources/frameworks/reflective_patterns.toml +0 -11
  58. lionagi/libs/token_transform/synthlang_/resources/frameworks/set_theory.toml +0 -11
  59. lionagi/libs/token_transform/synthlang_/resources/frameworks/topology_fundamentals.toml +0 -11
  60. lionagi/libs/token_transform/synthlang_/resources/mapping/lion_emoji_mapping.toml +0 -61
  61. lionagi/libs/token_transform/synthlang_/resources/mapping/python_math_mapping.toml +0 -41
  62. lionagi/libs/token_transform/synthlang_/resources/mapping/rust_chinese_mapping.toml +0 -60
  63. lionagi/libs/token_transform/synthlang_/resources/utility/base_synthlang_system_prompt.toml +0 -11
  64. lionagi/libs/token_transform/synthlang_/translate_to_synthlang.py +0 -140
  65. lionagi/libs/token_transform/types.py +0 -15
  66. lionagi/libs/unstructured/__init__.py +0 -0
  67. lionagi/libs/unstructured/pdf_to_image.py +0 -45
  68. lionagi/libs/unstructured/read_image_to_base64.py +0 -33
  69. lionagi/libs/validate/fuzzy_match_keys.py +0 -7
  70. lionagi/libs/validate/fuzzy_validate_mapping.py +0 -144
  71. lionagi/libs/validate/string_similarity.py +0 -7
  72. lionagi/libs/validate/xml_parser.py +0 -203
  73. lionagi/models/note.py +0 -383
  74. lionagi/operations/translate/__init__.py +0 -0
  75. lionagi/operations/translate/translate.py +0 -47
  76. lionagi/service/connections/providers/claude_code_.py +0 -294
  77. lionagi/tools/memory/tools.py +0 -495
  78. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/WHEEL +0 -0
  79. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,450 +0,0 @@
1
- import asyncio
2
- from dataclasses import dataclass
3
- from timeit import default_timer as timer
4
-
5
- import numpy as np
6
- from pydantic import BaseModel
7
-
8
- from lionagi.ln import alcall, lcall
9
- from lionagi.protocols.generic.event import EventStatus
10
- from lionagi.protocols.generic.log import Log
11
- from lionagi.service.connections.api_calling import APICalling
12
- from lionagi.service.imodel import iModel
13
- from lionagi.utils import to_dict, to_list
14
-
15
-
16
- @dataclass
17
- class PerplexityScores:
18
- """
19
- Stores logprobs, tokens, and derived perplexity from a completion response.
20
- """
21
-
22
- completion_response: BaseModel | dict
23
- original_tokens: list[str]
24
- n_samples: int
25
-
26
- @property
27
- def logprobs(self) -> list[float]:
28
- """Return list of logprobs extracted from the model response."""
29
- return [i["logprob"] for i in self.perplexity_scores]
30
-
31
- @property
32
- def perplexity(self) -> float:
33
- """
34
- e^(mean logprob), if logprobs exist. Fallback to 1.0 if empty.
35
- """
36
- if not self.logprobs:
37
- return 1.0
38
- return np.exp(np.mean(self.logprobs))
39
-
40
- @property
41
- def perplexity_scores(self) -> list[dict]:
42
- """
43
- Return [{'token': ..., 'logprob': ...}, ...].
44
- Handles two possible logprob structures:
45
- - "tokens" + "token_logprobs"
46
- - "content" (older style)
47
- """
48
- outs = []
49
- try:
50
- if isinstance(self.completion_response, dict):
51
- log_prob = self.completion_response["choices"][0]["logprobs"]
52
- else:
53
- # Pydantic or other object
54
- log_prob = self.completion_response.choices[0].logprobs
55
- except Exception:
56
- return outs
57
-
58
- if not log_prob:
59
- return outs
60
-
61
- if "tokens" in log_prob and "token_logprobs" in log_prob:
62
- # OpenAI style logprobs
63
- for token, lp in zip(
64
- log_prob["tokens"], log_prob["token_logprobs"]
65
- ):
66
- outs.append({"token": token, "logprob": lp})
67
- elif "content" in log_prob:
68
- # Old style logprobs
69
- for item in log_prob["content"]:
70
- outs.append(
71
- {"token": item["token"], "logprob": item["logprob"]}
72
- )
73
- return outs
74
-
75
- def to_dict(self) -> dict:
76
- """
77
- Construct a dictionary representation, including perplexity, usage, etc.
78
- """
79
- # usage info
80
- usage = {}
81
- if isinstance(self.completion_response, dict):
82
- usage = self.completion_response.get("usage", {})
83
- else:
84
- usage = to_dict(self.completion_response.usage)
85
-
86
- return {
87
- "perplexity": self.perplexity,
88
- "original_tokens": self.original_tokens,
89
- "prompt_tokens": usage.get("prompt_tokens", 0),
90
- "completion_tokens": usage.get("completion_tokens", 0),
91
- "total_tokens": usage.get("total_tokens", 0),
92
- }
93
-
94
- def to_log(self) -> Log:
95
- """
96
- Return a Log object for convenience.
97
- """
98
- return Log(content=self.to_dict())
99
-
100
-
101
- async def compute_perplexity(
102
- chat_model: iModel,
103
- initial_context: str = None,
104
- tokens: list[str] = None,
105
- system: str = None,
106
- n_samples: int = 1,
107
- use_residue: bool = True,
108
- **kwargs,
109
- ) -> list[PerplexityScores]:
110
- """
111
- Splits tokens into n_samples chunks, calls the model with logprobs=True,
112
- and returns PerplexityScores for each chunk.
113
- """
114
- context = initial_context or ""
115
- n_samples = n_samples or len(tokens)
116
-
117
- sample_token_len, residue = divmod(len(tokens), n_samples)
118
- if n_samples == 1:
119
- samples = [tokens]
120
- else:
121
- samples = [
122
- tokens[: (i + 1) * sample_token_len] for i in range(n_samples)
123
- ]
124
- if use_residue and residue != 0:
125
- samples.append(tokens[-residue:])
126
-
127
- # Build text for each chunk
128
- sampless = [context + " " + " ".join(s) for s in samples]
129
- kwargs["logprobs"] = True
130
-
131
- async def _inner(api_call: APICalling):
132
- await api_call.invoke()
133
- elapsed = 0
134
- while (
135
- api_call.status not in [EventStatus.COMPLETED, EventStatus.FAILED]
136
- and elapsed < 5
137
- ):
138
- await asyncio.sleep(0.1)
139
- elapsed += 0.1
140
- return api_call.response
141
-
142
- # Create and schedule calls
143
- api_calls = []
144
- for sample_txt in sampless:
145
- messages = []
146
- if system:
147
- if not chat_model.sequential_exchange:
148
- messages.append({"role": "system", "content": system})
149
- messages.append({"role": "user", "content": sample_txt})
150
- else:
151
- messages.append({"role": "user", "content": sample_txt})
152
-
153
- api_calls.append(
154
- chat_model.create_api_calling(messages=messages, **kwargs)
155
- )
156
-
157
- results = await alcall(api_calls, _inner, max_concurrent=50)
158
-
159
- def _pplx_score(input_):
160
- idx, resp = input_
161
- return PerplexityScores(resp, samples[idx], n_samples)
162
-
163
- return lcall(enumerate(results), _pplx_score)
164
-
165
-
166
- class LLMCompressor:
167
- """
168
- Compress text by selecting segments with highest perplexity tokens
169
- (or in practice, rank segments by logprob).
170
- """
171
-
172
- def __init__(
173
- self,
174
- chat_model: iModel,
175
- system=None,
176
- tokenizer=None,
177
- splitter=None,
178
- compression_ratio=0.2,
179
- n_samples=5,
180
- chunk_size=64,
181
- max_tokens_per_sample=80,
182
- min_pplx=0,
183
- split_overlap=0,
184
- split_threshold=0,
185
- verbose=True,
186
- ):
187
- # Must have "logprobs" support
188
- if "logprobs" not in chat_model.endpoint.acceptable_kwargs:
189
- raise ValueError(
190
- f"Model {chat_model.model_name} does not support logprobs. "
191
- "Please use a model that supports logprobs."
192
- )
193
-
194
- self.chat_model = chat_model
195
- self.tokenizer = tokenizer
196
- self.splitter = splitter
197
- self.system = system or "Concisely summarize content for storage:"
198
- self.compression_ratio = compression_ratio
199
- self.n_samples = n_samples
200
- self.chunk_size = chunk_size
201
- self.max_tokens_per_sample = max_tokens_per_sample
202
- self.min_pplx = min_pplx
203
- self.verbose = verbose
204
- self.split_overlap = split_overlap
205
- self.split_threshold = split_threshold
206
-
207
- def tokenize(self, text: str, **kwargs) -> list[str]:
208
- """
209
- Tokenize text. If no custom tokenizer, use the default from lionagi.
210
- """
211
- if not self.tokenizer:
212
- from lionagi.service.token_calculator import TokenCalculator
213
-
214
- return TokenCalculator.tokenize(
215
- text,
216
- encoding_name=self.chat_model.model_name,
217
- return_tokens=True,
218
- )
219
- if hasattr(self.tokenizer, "tokenize"):
220
- return self.tokenizer.tokenize(text, **kwargs)
221
- return self.tokenizer(text, **kwargs)
222
-
223
- def split(
224
- self,
225
- text: str,
226
- chunk_size=None,
227
- overlap=None,
228
- threshold=None,
229
- by_chars=False,
230
- return_tokens=False,
231
- **kwargs,
232
- ) -> list:
233
- """
234
- Split text into segments. If no custom splitter, default to chunk_content from lionagi.
235
- """
236
- if not self.splitter:
237
- from lionagi.libs.file.chunk import chunk_content
238
-
239
- contents = chunk_content(
240
- content=text,
241
- chunk_size=chunk_size or self.chunk_size,
242
- overlap=overlap or self.split_overlap,
243
- threshold=threshold or self.split_threshold,
244
- return_tokens=return_tokens,
245
- chunk_by="chars" if by_chars else "tokens",
246
- )
247
- return [i["chunk_content"] for i in contents]
248
-
249
- # If user provided an object with .split or .chunk or .segment
250
- for meth in ["split", "chunk", "segment"]:
251
- if hasattr(self.splitter, meth):
252
- return getattr(self.splitter, meth)(text, **kwargs)
253
- raise ValueError(
254
- "No valid method found in splitter: must have .split/.chunk/.segment"
255
- )
256
-
257
- async def rank_by_pplex(
258
- self,
259
- items: list,
260
- initial_text=None,
261
- cumulative=False,
262
- n_samples=None,
263
- use_residue=True,
264
- **kwargs,
265
- ) -> list:
266
- """
267
- Rank items (token lists or strings) by perplexity descending.
268
- If cumulative=True, each item is appended to the context.
269
- """
270
-
271
- async def _get_item_perplexity(item):
272
- # Ensure item is a list of tokens
273
- item_toks = item if isinstance(item, list) else [item]
274
- if len(item_toks) > self.max_tokens_per_sample:
275
- item_toks = item_toks[: self.max_tokens_per_sample]
276
- pplex_scores = await compute_perplexity(
277
- chat_model=self.chat_model,
278
- initial_context=initial_text,
279
- tokens=item_toks,
280
- n_samples=n_samples or self.n_samples,
281
- system=self.system,
282
- use_residue=use_residue,
283
- **kwargs,
284
- )
285
- # Usually we only look at pplex_scores[0], as there's one chunk
286
- return pplex_scores
287
-
288
- # If user passed a single string, tokenize it
289
- if isinstance(items, str):
290
- items = self.tokenize(items)
291
-
292
- if len(items) == 1:
293
- single_scores = await _get_item_perplexity(items[0])
294
- return [(items[0], single_scores[0])]
295
-
296
- segments = []
297
- if cumulative:
298
- ctx = initial_text or ""
299
- for i in items:
300
- seg_toks = i if isinstance(i, list) else [i]
301
- joined = " ".join(seg_toks)
302
- ctx += " " + joined
303
- segments.append(ctx)
304
- else:
305
- for i in items:
306
- seg_toks = i if isinstance(i, list) else [i]
307
- segments.append(" ".join(seg_toks))
308
-
309
- tasks = [
310
- asyncio.create_task(_get_item_perplexity(seg)) for seg in segments
311
- ]
312
- results = await asyncio.gather(*tasks)
313
- # Pair each item with the first pplex (p[0]) if multiple were returned
314
- pairs = [(itm, pplex[0]) for itm, pplex in zip(items, results)]
315
-
316
- # Sort descending by perplexity
317
- return sorted(pairs, key=lambda x: x[1].perplexity, reverse=True)
318
-
319
- async def compress(
320
- self,
321
- text: str,
322
- compression_ratio=None,
323
- initial_text=None,
324
- cumulative=False,
325
- split_kwargs=None,
326
- min_pplx=None,
327
- **kwargs,
328
- ) -> str:
329
- """
330
- Main method to compress text:
331
- 1) Split text
332
- 2) Rank by perplexity
333
- 3) Select best segments until reaching target ratio
334
- """
335
- start = timer()
336
- if split_kwargs is None:
337
- split_kwargs = {
338
- "chunk_size": self.max_tokens_per_sample,
339
- "overlap": self.split_overlap,
340
- "threshold": self.split_threshold,
341
- "return_tokens": True,
342
- }
343
-
344
- # Tokenize once to get total length
345
- all_tokens = self.tokenize(text)
346
- original_len = len(all_tokens)
347
- ttl_chars = len(text)
348
-
349
- # Split text
350
- items = self.split(text, **split_kwargs)
351
- # items -> list of token-lists
352
-
353
- # Rank
354
- ranked = await self.rank_by_pplex(
355
- items=items,
356
- initial_text=initial_text,
357
- cumulative=cumulative,
358
- **kwargs,
359
- )
360
-
361
- # Select
362
- selected = self.select_by_pplex(
363
- ranked_items=ranked,
364
- target_compression_ratio=compression_ratio
365
- or self.compression_ratio,
366
- original_length=original_len,
367
- min_pplx=min_pplx or self.min_pplx,
368
- )
369
-
370
- # Join final
371
- out_str = " ".join(selected)
372
-
373
- if self.verbose:
374
- compressed_chars = len(out_str)
375
- ratio = compressed_chars / ttl_chars if original_len else 1
376
- msg = "------------------------------------------\n"
377
- msg += f"Compression Method: Perplexity\n"
378
- msg += f"Compressed Characters number: {compressed_chars}\n"
379
- msg += f"Character Compression Ratio: {ratio:.1%}\n"
380
- msg += f"Compression Time: {timer() - start:.3f}s\n"
381
- msg += f"Compression Model: {self.chat_model.model_name}\n"
382
- print(msg)
383
-
384
- return out_str.strip()
385
-
386
- def select_by_pplex(
387
- self,
388
- ranked_items: list,
389
- target_compression_ratio: float,
390
- original_length: int,
391
- min_pplx=0,
392
- ) -> list[str]:
393
- """
394
- From highest perplexity to lowest, pick items until we reach the desired ratio.
395
- Items below min_pplx are skipped.
396
- """
397
- desired_len = int(original_length * target_compression_ratio)
398
-
399
- chosen = []
400
- current_len = 0
401
- for item, info in ranked_items:
402
- if info.perplexity > min_pplx:
403
- if isinstance(item, list):
404
- item_toks = to_list(item, dropna=True, flatten=True)
405
- else:
406
- item_toks = self.tokenize(item)
407
- if current_len + len(item_toks) > desired_len:
408
- break
409
- chosen.append(" ".join(item_toks))
410
- current_len += len(item_toks)
411
-
412
- return chosen
413
-
414
-
415
- # Helper function to quickly compress text using perplexity
416
- # (If you don't want to manually create LLMCompressor instance everywhere)
417
- async def compress_text(
418
- text: str,
419
- chat_model: iModel,
420
- system: str = None,
421
- compression_ratio: float = 0.2,
422
- n_samples: int = 5,
423
- max_tokens_per_sample=80,
424
- verbose=True,
425
- initial_text=None,
426
- cumulative=False,
427
- split_kwargs=None,
428
- min_pplx=None,
429
- **kwargs,
430
- ) -> str:
431
- """
432
- Convenience function that instantiates LLMCompressor and compresses text.
433
- """
434
- compressor = LLMCompressor(
435
- chat_model=chat_model,
436
- system=system,
437
- compression_ratio=compression_ratio,
438
- n_samples=n_samples,
439
- max_tokens_per_sample=max_tokens_per_sample,
440
- verbose=verbose,
441
- )
442
- return await compressor.compress(
443
- text,
444
- compression_ratio=compression_ratio,
445
- initial_text=initial_text,
446
- cumulative=cumulative,
447
- split_kwargs=split_kwargs,
448
- min_pplx=min_pplx,
449
- **kwargs,
450
- )
@@ -1,152 +0,0 @@
1
- from collections.abc import Callable
2
- from pathlib import Path
3
- from typing import Literal
4
-
5
- from lionagi.ln import alcall
6
- from lionagi.service.imodel import iModel
7
- from lionagi.session.branch import Branch
8
- from lionagi.utils import get_bins
9
-
10
- from .base import TokenMapping, TokenMappingTemplate
11
- from .synthlang_.base import SynthlangFramework, SynthlangTemplate
12
-
13
- FRAMEWORK_OPTIONS = SynthlangFramework.load_framework_options()
14
- FRAMEWORK_CHOICES = Literal["math", "optim", "custom_algebra"]
15
- DEFAULT_INVOKATION_PROMPT = (
16
- "The light-speed brown fox jumps over the lazy dog with great agility."
17
- )
18
-
19
-
20
- async def symbolic_compress_context(
21
- *,
22
- text: str = None,
23
- url_or_path: str | Path = None,
24
- chunk_by="tokens",
25
- chunk_size: int = 1000,
26
- chunk_tokenizer: Callable = None,
27
- threshold=50,
28
- output_path: Path | str = None,
29
- overlap=0.025,
30
- system: str = None,
31
- chat_model: iModel = None,
32
- use_lion_system_message: bool = True,
33
- max_concurrent=10,
34
- throttle_period=1,
35
- framework: Literal["synthlang"] = "synthlang",
36
- framework_template: (
37
- SynthlangTemplate | SynthlangFramework
38
- ) = SynthlangTemplate.REFLECTIVE_PATTERNS,
39
- framework_options: list[FRAMEWORK_CHOICES] = None,
40
- compress: bool = False,
41
- compress_model: iModel = None,
42
- compression_ratio: float = 0.2,
43
- compress_initial_text=None,
44
- compress_cumulative=False,
45
- compress_split_kwargs=None,
46
- compress_min_pplx=None,
47
- encode_token_map: TokenMappingTemplate | dict | TokenMapping = None,
48
- num_encodings: int = 3,
49
- encode_output: bool = True,
50
- num_output_encodings: int = 1,
51
- verbose: bool = True,
52
- branch: Branch = None,
53
- additional_text: str = "",
54
- **kwargs,
55
- ):
56
- if framework != "synthlang":
57
- raise ValueError(f"Unsupported framework: {framework}")
58
-
59
- if not text and not url_or_path:
60
- raise ValueError("Either text or url_or_path must be provided.")
61
-
62
- if text and url_or_path:
63
- raise ValueError("Only one of text or url_or_path should be provided.")
64
-
65
- from .synthlang_.translate_to_synthlang import translate_to_synthlang
66
-
67
- async def _inner(text: str):
68
- b_ = None
69
- if branch:
70
- b_ = await branch.aclone()
71
- else:
72
- b_ = Branch(
73
- system=system,
74
- use_lion_system_message=use_lion_system_message,
75
- chat_model=chat_model,
76
- )
77
-
78
- return await translate_to_synthlang(
79
- text,
80
- branch=b_,
81
- framework_template=framework_template,
82
- framework_options=framework_options,
83
- compress=compress,
84
- compress_model=compress_model,
85
- compression_ratio=compression_ratio,
86
- compress_kwargs={
87
- "initial_text": compress_initial_text,
88
- "cumulative": compress_cumulative,
89
- "split_kwargs": compress_split_kwargs,
90
- "min_pplx": compress_min_pplx,
91
- },
92
- encode_token_map=encode_token_map,
93
- num_encodings=num_encodings,
94
- encode_output=encode_output,
95
- num_output_encodings=num_output_encodings,
96
- verbose=verbose,
97
- additional_text=additional_text,
98
- **kwargs,
99
- )
100
-
101
- from lionagi.libs.file.process import chunk, chunk_content
102
-
103
- chunks = []
104
- if url_or_path:
105
- chunks = chunk(
106
- url_or_path=url_or_path,
107
- chunk_by=chunk_by,
108
- chunk_size=chunk_size,
109
- overlap=overlap,
110
- threshold=threshold,
111
- )
112
-
113
- elif text:
114
- chunks = chunk_content(
115
- text,
116
- chunk_by=chunk_by,
117
- chunk_size=chunk_size,
118
- overlap=overlap,
119
- threshold=threshold,
120
- tokenizer=chunk_tokenizer or str.split,
121
- )
122
-
123
- texts = [str(i).strip() for i in chunks if str(i).strip()]
124
- bins = get_bins(texts, upper=chunk_size)
125
- textss = []
126
- for i in bins:
127
- textss.append("\n".join([texts[j] for j in i]))
128
-
129
- results = await alcall(
130
- textss,
131
- _inner,
132
- max_concurrent=max_concurrent,
133
- retry_default=None,
134
- retry_attempts=3,
135
- retry_backoff=2,
136
- retry_delay=1,
137
- throttle_period=throttle_period,
138
- output_flatten=True,
139
- output_dropna=True,
140
- output_unique=True,
141
- )
142
- text = "\n".join(results)
143
- text = DEFAULT_INVOKATION_PROMPT + text
144
-
145
- if output_path:
146
- fp = Path(output_path)
147
- fp.write_text(text)
148
- if verbose:
149
- print(f"Results of {len(text)} characters saved to: {fp}")
150
-
151
- return fp
152
- return text
@@ -1,9 +0,0 @@
1
- from .synthlang_.base import SynthlangFramework, SynthlangTemplate
2
- from .synthlang_.translate_to_synthlang import translate_to_synthlang
3
-
4
- # backwards compatibility
5
- __all__ = (
6
- "translate_to_synthlang",
7
- "SynthlangFramework",
8
- "SynthlangTemplate",
9
- )