abstract-utilities 0.2.2.442__py3-none-any.whl → 0.2.2.688__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of abstract-utilities might be problematic. Click here for more details.

Files changed (236) hide show
  1. abstract_utilities/__init__.py +25 -16
  2. abstract_utilities/circular_import_finder.py +222 -0
  3. abstract_utilities/circular_import_finder2.py +118 -0
  4. abstract_utilities/class_utils/__init__.py +7 -0
  5. abstract_utilities/class_utils/abstract_classes.py +144 -0
  6. abstract_utilities/class_utils/caller_utils.py +92 -0
  7. abstract_utilities/class_utils/class_utils.py +109 -0
  8. abstract_utilities/class_utils/function_utils.py +153 -0
  9. abstract_utilities/class_utils/global_utils.py +71 -0
  10. abstract_utilities/class_utils/imports/__init__.py +2 -0
  11. abstract_utilities/class_utils/imports/imports.py +2 -0
  12. abstract_utilities/class_utils/imports/utils.py +40 -0
  13. abstract_utilities/class_utils/module_utils.py +63 -0
  14. abstract_utilities/class_utils.py +0 -1
  15. abstract_utilities/directory_utils/__init__.py +2 -0
  16. abstract_utilities/directory_utils/directory_utils.py +94 -0
  17. abstract_utilities/directory_utils/imports/__init__.py +2 -0
  18. abstract_utilities/directory_utils/imports/imports.py +1 -0
  19. abstract_utilities/directory_utils/imports/module_imports.py +2 -0
  20. abstract_utilities/directory_utils/name_utils.py +43 -0
  21. abstract_utilities/directory_utils/size_utils.py +57 -0
  22. abstract_utilities/directory_utils/src/__init__.py +4 -0
  23. abstract_utilities/directory_utils/src/directory_utils.py +110 -0
  24. abstract_utilities/directory_utils/src/name_utils.py +43 -0
  25. abstract_utilities/directory_utils/src/size_utils.py +57 -0
  26. abstract_utilities/directory_utils/src/utils.py +116 -0
  27. abstract_utilities/directory_utils/utils.py +116 -0
  28. abstract_utilities/dynimport.py +1 -1
  29. abstract_utilities/env_utils/imports/imports.py +5 -3
  30. abstract_utilities/error_utils/__init__.py +2 -0
  31. abstract_utilities/error_utils/error_utils.py +25 -0
  32. abstract_utilities/error_utils/imports/__init__.py +2 -0
  33. abstract_utilities/error_utils/imports/imports.py +1 -0
  34. abstract_utilities/error_utils/imports/module_imports.py +1 -0
  35. abstract_utilities/file_utils/__init__.py +1 -2
  36. abstract_utilities/file_utils/file_utils/__init__.py +2 -0
  37. abstract_utilities/file_utils/file_utils/file_utils.py +3 -3
  38. abstract_utilities/file_utils/file_utils/find_collect.py +154 -0
  39. abstract_utilities/file_utils/file_utils/imports/__init__.py +3 -0
  40. abstract_utilities/file_utils/file_utils/imports/constants.py +39 -0
  41. abstract_utilities/file_utils/file_utils/imports/file_functions.py +10 -0
  42. abstract_utilities/file_utils/file_utils/imports/imports.py +39 -0
  43. abstract_utilities/file_utils/file_utils/imports/module_imports.py +14 -0
  44. abstract_utilities/file_utils/file_utils/imports.py +9 -0
  45. abstract_utilities/file_utils/file_utils/type_checks.py +92 -0
  46. abstract_utilities/file_utils/imports/__init__.py +1 -2
  47. abstract_utilities/file_utils/imports/classes.py +59 -55
  48. abstract_utilities/file_utils/imports/clean_imps.py +158 -0
  49. abstract_utilities/file_utils/imports/constants.py +84 -4
  50. abstract_utilities/file_utils/imports/file_functions.py +1 -1
  51. abstract_utilities/file_utils/imports/imports.py +40 -8
  52. abstract_utilities/file_utils/imports/module_imports.py +4 -5
  53. abstract_utilities/file_utils/module_imports.py +12 -0
  54. abstract_utilities/file_utils/src/__init__.py +7 -0
  55. abstract_utilities/file_utils/src/file_filters/__init__.py +1 -0
  56. abstract_utilities/file_utils/src/file_filters/ensure_utils.py +490 -0
  57. abstract_utilities/file_utils/src/file_filters/filter_params.py +150 -0
  58. abstract_utilities/file_utils/src/file_filters/filter_utils.py +78 -0
  59. abstract_utilities/file_utils/src/file_filters/predicate_utils.py +44 -0
  60. abstract_utilities/file_utils/src/file_filters.py +177 -0
  61. abstract_utilities/file_utils/src/file_reader.py +543 -0
  62. abstract_utilities/file_utils/src/file_utils.py +156 -0
  63. abstract_utilities/file_utils/src/filter_params.py +197 -0
  64. abstract_utilities/file_utils/src/find_collect.py +200 -0
  65. abstract_utilities/file_utils/src/find_content.py +210 -0
  66. abstract_utilities/file_utils/src/initFunctionsGen.py +293 -0
  67. abstract_utilities/file_utils/src/initFunctionsGens.py +280 -0
  68. abstract_utilities/file_utils/src/map_utils.py +29 -0
  69. abstract_utilities/file_utils/src/pdf_utils.py +300 -0
  70. abstract_utilities/file_utils/src/reader_utils/__init__.py +4 -0
  71. abstract_utilities/file_utils/src/reader_utils/directory_reader.py +53 -0
  72. abstract_utilities/file_utils/src/reader_utils/file_reader.py +543 -0
  73. abstract_utilities/file_utils/src/reader_utils/file_readers.py +376 -0
  74. abstract_utilities/file_utils/src/reader_utils/imports.py +18 -0
  75. abstract_utilities/file_utils/src/reader_utils/pdf_utils.py +300 -0
  76. abstract_utilities/file_utils/src/type_checks.py +91 -0
  77. abstract_utilities/file_utils (2)/__init__.py +2 -0
  78. abstract_utilities/file_utils (2)/imports/__init__.py +2 -0
  79. abstract_utilities/file_utils (2)/imports/constants.py +118 -0
  80. abstract_utilities/file_utils (2)/imports/imports/__init__.py +3 -0
  81. abstract_utilities/file_utils (2)/imports/imports/constants.py +119 -0
  82. abstract_utilities/file_utils (2)/imports/imports/imports.py +46 -0
  83. abstract_utilities/file_utils (2)/imports/imports/module_imports.py +8 -0
  84. abstract_utilities/file_utils (2)/imports/utils/__init__.py +3 -0
  85. abstract_utilities/file_utils (2)/imports/utils/classes.py +379 -0
  86. abstract_utilities/file_utils (2)/imports/utils/clean_imps.py +155 -0
  87. abstract_utilities/file_utils (2)/imports/utils/filter_utils.py +341 -0
  88. abstract_utilities/file_utils (2)/src/__init__.py +8 -0
  89. abstract_utilities/file_utils (2)/src/file_filters.py +155 -0
  90. abstract_utilities/file_utils (2)/src/file_reader.py +604 -0
  91. abstract_utilities/file_utils (2)/src/find_collect.py +258 -0
  92. abstract_utilities/file_utils (2)/src/initFunctionsGen.py +286 -0
  93. abstract_utilities/file_utils (2)/src/map_utils.py +28 -0
  94. abstract_utilities/file_utils (2)/src/pdf_utils.py +300 -0
  95. abstract_utilities/hash_utils/__init__.py +2 -0
  96. abstract_utilities/hash_utils/hash_utils.py +5 -0
  97. abstract_utilities/hash_utils/imports/__init__.py +2 -0
  98. abstract_utilities/hash_utils/imports/imports.py +1 -0
  99. abstract_utilities/hash_utils/imports/module_imports.py +0 -0
  100. abstract_utilities/history_utils/__init__.py +2 -0
  101. abstract_utilities/history_utils/history_utils.py +37 -0
  102. abstract_utilities/history_utils/imports/__init__.py +2 -0
  103. abstract_utilities/history_utils/imports/imports.py +1 -0
  104. abstract_utilities/history_utils/imports/module_imports.py +0 -0
  105. abstract_utilities/import_utils/__init__.py +2 -0
  106. abstract_utilities/import_utils/circular_import_finder.py +222 -0
  107. abstract_utilities/import_utils/circular_import_finder2.py +118 -0
  108. abstract_utilities/import_utils/imports/__init__.py +4 -0
  109. abstract_utilities/import_utils/imports/constants.py +2 -0
  110. abstract_utilities/import_utils/imports/imports.py +4 -0
  111. abstract_utilities/import_utils/imports/init_imports.py +3 -0
  112. abstract_utilities/import_utils/imports/module_imports.py +9 -0
  113. abstract_utilities/import_utils/imports/utils.py +30 -0
  114. abstract_utilities/import_utils/src/__init__.py +8 -0
  115. abstract_utilities/import_utils/src/clean_imports.py +278 -0
  116. abstract_utilities/import_utils/src/dot_utils.py +80 -0
  117. abstract_utilities/import_utils/src/extract_utils.py +46 -0
  118. abstract_utilities/import_utils/src/import_functions.py +110 -0
  119. abstract_utilities/import_utils/src/import_utils.py +349 -0
  120. abstract_utilities/import_utils/src/layze_import_utils/__init__.py +2 -0
  121. abstract_utilities/import_utils/src/layze_import_utils/lazy_utils.py +41 -0
  122. abstract_utilities/import_utils/src/layze_import_utils/nullProxy.py +37 -0
  123. abstract_utilities/import_utils/src/nullProxy.py +30 -0
  124. abstract_utilities/import_utils/src/package_utils/__init__.py +139 -0
  125. abstract_utilities/import_utils/src/package_utils/context_utils.py +27 -0
  126. abstract_utilities/import_utils/src/package_utils/import_collectors.py +53 -0
  127. abstract_utilities/import_utils/src/package_utils/path_utils.py +28 -0
  128. abstract_utilities/import_utils/src/package_utils/safe_import.py +27 -0
  129. abstract_utilities/import_utils/src/package_utils.py +140 -0
  130. abstract_utilities/import_utils/src/package_utilss/__init__.py +139 -0
  131. abstract_utilities/import_utils/src/package_utilss/context_utils.py +27 -0
  132. abstract_utilities/import_utils/src/package_utilss/import_collectors.py +53 -0
  133. abstract_utilities/import_utils/src/package_utilss/path_utils.py +28 -0
  134. abstract_utilities/import_utils/src/package_utilss/safe_import.py +27 -0
  135. abstract_utilities/import_utils/src/pkg_utils.py +194 -0
  136. abstract_utilities/import_utils/src/sysroot_utils.py +112 -0
  137. abstract_utilities/imports.py +21 -0
  138. abstract_utilities/json_utils/__init__.py +2 -0
  139. abstract_utilities/json_utils/imports/__init__.py +2 -0
  140. abstract_utilities/json_utils/imports/imports.py +2 -0
  141. abstract_utilities/json_utils/imports/module_imports.py +5 -0
  142. abstract_utilities/json_utils/json_utils.py +777 -0
  143. abstract_utilities/list_utils/__init__.py +2 -0
  144. abstract_utilities/list_utils/imports/__init__.py +2 -0
  145. abstract_utilities/list_utils/imports/imports.py +1 -0
  146. abstract_utilities/list_utils/imports/module_imports.py +0 -0
  147. abstract_utilities/list_utils/list_utils.py +202 -0
  148. abstract_utilities/log_utils/__init__.py +5 -0
  149. abstract_utilities/log_utils/abstractLogManager.py +64 -0
  150. abstract_utilities/log_utils/call_response.py +68 -0
  151. abstract_utilities/log_utils/imports/__init__.py +2 -0
  152. abstract_utilities/log_utils/imports/imports.py +7 -0
  153. abstract_utilities/log_utils/imports/module_imports.py +2 -0
  154. abstract_utilities/log_utils/log_file.py +162 -0
  155. abstract_utilities/log_utils/logger_callable.py +49 -0
  156. abstract_utilities/math_utils/__init__.py +2 -0
  157. abstract_utilities/math_utils/imports/__init__.py +2 -0
  158. abstract_utilities/math_utils/imports/imports.py +2 -0
  159. abstract_utilities/math_utils/imports/module_imports.py +1 -0
  160. abstract_utilities/math_utils/math_utils.py +208 -0
  161. abstract_utilities/parse_utils/__init__.py +2 -0
  162. abstract_utilities/parse_utils/imports/__init__.py +3 -0
  163. abstract_utilities/parse_utils/imports/constants.py +10 -0
  164. abstract_utilities/parse_utils/imports/imports.py +2 -0
  165. abstract_utilities/parse_utils/imports/module_imports.py +4 -0
  166. abstract_utilities/parse_utils/parse_utils.py +539 -0
  167. abstract_utilities/path_utils/__init__.py +2 -0
  168. abstract_utilities/path_utils/imports/__init__.py +3 -0
  169. abstract_utilities/path_utils/imports/imports.py +1 -0
  170. abstract_utilities/path_utils/imports/module_imports.py +8 -0
  171. abstract_utilities/path_utils/path_utils.py +248 -0
  172. abstract_utilities/path_utils.py +95 -14
  173. abstract_utilities/read_write_utils/__init__.py +1 -0
  174. abstract_utilities/read_write_utils/imports/__init__.py +2 -0
  175. abstract_utilities/read_write_utils/imports/imports.py +2 -0
  176. abstract_utilities/read_write_utils/imports/module_imports.py +5 -0
  177. abstract_utilities/read_write_utils/read_write_utils.py +439 -0
  178. abstract_utilities/read_write_utils.py +218 -10
  179. abstract_utilities/robust_reader/imports/imports.py +0 -9
  180. abstract_utilities/robust_readers/import_utils/__init__.py +1 -0
  181. abstract_utilities/robust_readers/import_utils/clean_imports.py +175 -0
  182. abstract_utilities/robust_readers/initFuncGen.py +10 -2
  183. abstract_utilities/safe_utils/__init__.py +2 -0
  184. abstract_utilities/safe_utils/imports/__init__.py +3 -0
  185. abstract_utilities/safe_utils/imports/imports.py +2 -0
  186. abstract_utilities/safe_utils/imports/module_imports.py +2 -0
  187. abstract_utilities/safe_utils/safe_utils.py +166 -0
  188. abstract_utilities/ssh_utils/__init__.py +3 -1
  189. abstract_utilities/ssh_utils/classes.py +0 -1
  190. abstract_utilities/ssh_utils/cmd_utils.py +207 -0
  191. abstract_utilities/ssh_utils/imports/__init__.py +3 -0
  192. abstract_utilities/ssh_utils/imports/imports.py +5 -0
  193. abstract_utilities/ssh_utils/imports/module_imports.py +6 -0
  194. abstract_utilities/ssh_utils/imports/utils.py +189 -0
  195. abstract_utilities/ssh_utils/imports.py +1 -2
  196. abstract_utilities/ssh_utils/pexpect_utils.py +11 -18
  197. abstract_utilities/ssh_utils/type_checks.py +92 -0
  198. abstract_utilities/string_clean.py +40 -1
  199. abstract_utilities/string_utils/__init__.py +4 -0
  200. abstract_utilities/string_utils/clean_utils.py +28 -0
  201. abstract_utilities/string_utils/eat_utils.py +103 -0
  202. abstract_utilities/string_utils/imports/__init__.py +3 -0
  203. abstract_utilities/string_utils/imports/imports.py +2 -0
  204. abstract_utilities/string_utils/imports/module_imports.py +2 -0
  205. abstract_utilities/string_utils/imports/utils.py +81 -0
  206. abstract_utilities/string_utils/replace_utils.py +27 -0
  207. abstract_utilities/string_utils.py +51 -0
  208. abstract_utilities/thread_utils/__init__.py +2 -0
  209. abstract_utilities/thread_utils/imports/__init__.py +2 -0
  210. abstract_utilities/thread_utils/imports/imports.py +2 -0
  211. abstract_utilities/thread_utils/imports/module_imports.py +2 -0
  212. abstract_utilities/thread_utils/thread_utils.py +140 -0
  213. abstract_utilities/time_utils/__init__.py +2 -0
  214. abstract_utilities/time_utils/imports/__init__.py +2 -0
  215. abstract_utilities/time_utils/imports/imports.py +3 -0
  216. abstract_utilities/time_utils/imports/module_imports.py +1 -0
  217. abstract_utilities/time_utils/time_utils.py +392 -0
  218. abstract_utilities/type_utils/__init__.py +7 -0
  219. abstract_utilities/type_utils/alpha_utils.py +59 -0
  220. abstract_utilities/type_utils/get_type.py +120 -0
  221. abstract_utilities/type_utils/imports/__init__.py +3 -0
  222. abstract_utilities/type_utils/imports/constants.py +134 -0
  223. abstract_utilities/type_utils/imports/imports.py +4 -0
  224. abstract_utilities/type_utils/imports/module_imports.py +25 -0
  225. abstract_utilities/type_utils/is_type.py +455 -0
  226. abstract_utilities/type_utils/make_type.py +126 -0
  227. abstract_utilities/type_utils/mime_types.py +68 -0
  228. abstract_utilities/type_utils/num_utils.py +19 -0
  229. abstract_utilities/type_utils/type_utils.py +104 -0
  230. abstract_utilities/type_utils.py +25 -1
  231. {abstract_utilities-0.2.2.442.dist-info → abstract_utilities-0.2.2.688.dist-info}/METADATA +1 -1
  232. abstract_utilities-0.2.2.688.dist-info/RECORD +288 -0
  233. imports/__init__.py +36 -0
  234. abstract_utilities-0.2.2.442.dist-info/RECORD +0 -82
  235. {abstract_utilities-0.2.2.442.dist-info → abstract_utilities-0.2.2.688.dist-info}/WHEEL +0 -0
  236. {abstract_utilities-0.2.2.442.dist-info → abstract_utilities-0.2.2.688.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,539 @@
1
+ from .imports import *
2
+ def detect_language_from_text(text: str):
3
+ patterns = {
4
+ 'javascript': [
5
+ r'\bfunction\s+\w+\s*\(.*\)\s*{',
6
+ r'\bvar\s+\w+\s*=',
7
+ r'\bconst\s+\w+\s*=',
8
+ r'\blet\s+\w+\s*=',
9
+ r'\bconsole\.log\s*\(',
10
+ r'\bexport\s+(default|function|const|class)',
11
+ r'\bimport\s+.*\s+from\s+[\'"]'
12
+ ],
13
+ 'typescript': [
14
+ r'\binterface\s+\w+\s*{',
15
+ r'\btype\s+\w+\s*=',
16
+ r'\blet\s+\w+:\s+\w+',
17
+ r'\bfunction\s+\w+\s*\(.*:\s*\w+\)',
18
+ r'\bimport\s+.*\s+from\s+[\'"]',
19
+ r'\bexport\s+(default|function|const|class)'
20
+ ],
21
+ 'python': [
22
+ r'\bdef\s+\w+\(',
23
+ r'\bclass\s+\w+\s*:',
24
+ r'\bimport\s+\w+',
25
+ r'\bfrom\s+\w+\s+import\s+\w+',
26
+ r'\bif\s+__name__\s*==\s*[\'"]__main__[\'"]',
27
+ r'@\w+',
28
+ r'\blambda\s+'
29
+ ],
30
+ 'html': [
31
+ r'<!DOCTYPE\s+html>',
32
+ r'<html[^>]*>',
33
+ r'<head>',
34
+ r'<body>',
35
+ r'<div[^>]*>',
36
+ r'<script[^>]*>',
37
+ r'</\w+>'
38
+ ],
39
+ 'php': [
40
+ r'<\?php',
41
+ r'\$\w+\s*=',
42
+ r'echo\s+["\']',
43
+ r'->\w+\(',
44
+ r'function\s+\w+\s*\(',
45
+ r'\bclass\s+\w+\s*{'
46
+ ],
47
+ 'bash': [
48
+ r'#!/bin/bash',
49
+ r'\becho\s+["\']',
50
+ r'\bif\s+\[\[?',
51
+ r'\bthen\b',
52
+ r'\bfi\b',
53
+ r'\bfor\s+\w+\s+in\b',
54
+ r'\bdo\b',
55
+ r'\bdone\b'
56
+ ]
57
+ }
58
+ text = str(text)
59
+ scores = {lang: sum(bool(re.search(p, text)) for p in pats) for lang, pats in patterns.items()}
60
+ max_score = max(scores.values(), default=0)
61
+
62
+ if max_score == 0:
63
+ return 'neither'
64
+
65
+ likely = [lang for lang, score in scores.items() if score == max_score]
66
+ return likely[0] if len(likely) == 1 else 'uncertain'
67
+ def get_tripple_string(string):
68
+ nustring = ''
69
+ for i in range(3):
70
+ nustring +=string
71
+ return nustring
72
+ def get_within_quotes(text,quotes=None):
73
+ quotes_strings = quotes or ["'",'"']
74
+ in_quotes = []
75
+ for quotes_string in quotes_strings:
76
+ if not isinstance(quotes_string,list):
77
+ tripple= get_tripple_string(quotes_string)
78
+ texts = [text]
79
+ if tripple in text:
80
+ texts= text.split(tripple)
81
+ for text_part in texts:
82
+ quote_count = len(text_part) - len(text_part.replace(quotes_string,''))
83
+ quote_spl = text_part.split(quotes_string)
84
+ in_quotes+=[quote_spl[i] for i in range(quote_count) if ((i == 1 or i%2 != float(0)) and len(quote_spl) > i)]
85
+ else:
86
+ texts= text.split(quotes_string[0])
87
+ for text in texts:
88
+ in_quotes.append(text.split(quotes_string[1])[0])
89
+ return in_quotes
90
+
91
+ def search_code(code_languages, parts):
92
+ return [data for datas in parts for data in make_list(datas)
93
+ if detect_language_from_text(data) in code_languages]
94
+ def get_token_encoder(model_name: str = "gpt-4", encoding_name: str = None):
95
+ import tiktoken
96
+ """
97
+ Retrieves the encoder for a given model or encoding name.
98
+
99
+ Args:
100
+ model_name (str): The name of the model. Defaults to "gpt-4".
101
+ encoding_name (str, optional): The encoding name to use. If not provided, it defaults based on the model.
102
+
103
+ Returns:
104
+ Encoder: A tiktoken encoder object.
105
+ """
106
+ if encoding_name:
107
+ return tiktoken.get_encoding(encoding_name)
108
+ else:
109
+ return tiktoken.encoding_for_model(model_name)
110
+
111
+ def num_tokens_from_string(string: str, model_name: str = "gpt-4", encoding_name: str = None) -> int:
112
+ """
113
+ Returns the number of tokens in a text string.
114
+
115
+ Args:
116
+ string (str): The input text.
117
+ model_name (str, optional): The model name to determine encoding if encoding_name is not specified. Defaults to "gpt-4".
118
+ encoding_name (str, optional): The encoding name to use. If not specified, uses model-based encoding.
119
+
120
+ Returns:
121
+ int: The count of tokens.
122
+ """
123
+ encoding = get_token_encoder(model_name, encoding_name)
124
+ num_tokens = len(encoding.encode(str(string)))
125
+ return num_tokens
126
+
127
+ def infer_tab_size(file_path):
128
+ if not os.path.isfile(file_path):
129
+ write_to_file(file_path=file_path, contents='\t')
130
+ with open(file_path, 'r') as file:
131
+ for line in file:
132
+ if '\t' in line:
133
+ return len(line) - len(line.lstrip()) # The length of indentation
134
+ return 4 # Default if no tab found
135
+
136
+ def get_blocks(data, delim='\n'):
137
+ if isinstance(data, list):
138
+ return data, None
139
+ if isinstance(data, tuple):
140
+ data, delim = data[0], data[-1]
141
+ return data.split(delim), delim
142
+
143
+ def get_indent_levels(text):
144
+ tab_size, indent_list = infer_tab_size('config.txt'), [0]
145
+ for line in text.split('\n'):
146
+ indent = 0
147
+ for char in line:
148
+ if char in [' ', '\t']:
149
+ indent += tab_size if char == '\t' else 1
150
+ else:
151
+ break
152
+ if indent not in indent_list:
153
+ indent_list.append(indent)
154
+ return indent_list
155
+
156
+ def get_code_blocks(data, indent_level=0):
157
+ blocks = [[]]
158
+ lines, delim = get_blocks(data, '\n')
159
+ for line in lines:
160
+ beginning = ''
161
+ for char in line:
162
+ if char in ['', ' ', '\n', '\t']:
163
+ beginning += char
164
+ else:
165
+ break
166
+ if len(beginning) == indent_level:
167
+ blocks[-1] = delim.join(blocks[-1])
168
+ blocks.append([line])
169
+ else:
170
+ blocks[-1].append(line)
171
+ blocks[-1] = delim.join(blocks[-1])
172
+ return blocks, delim
173
+
174
+ def chunk_any_to_tokens(data, max_tokens, model_name="gpt-4", encoding_name=None, delimiter='\n\n', reverse=False):
175
+ if isinstance(data, list):
176
+ blocks = data
177
+ else:
178
+ blocks, delimiter = get_blocks(data, delimiter)
179
+
180
+ if reverse:
181
+ blocks = reversed(blocks)
182
+
183
+ chunks = []
184
+ current_chunk = []
185
+
186
+ for block in blocks:
187
+ if num_tokens_from_string(delimiter.join(current_chunk + [block]), model_name, encoding_name) <= max_tokens:
188
+ current_chunk.append(block)
189
+ else:
190
+ if current_chunk:
191
+ chunks.append(delimiter.join(current_chunk))
192
+ current_chunk = [block]
193
+
194
+ if current_chunk:
195
+ chunks.append(''.join(current_chunk))
196
+
197
+ return chunks
198
+
199
+ def chunk_data_by_type(data, max_tokens, chunk_type=None, model_name="gpt-4", encoding_name=None, reverse=False):
200
+ delimiter = None
201
+ if chunk_type == "URL":
202
+ delimiter = None
203
+ blocks = re.split(r'<h[1-6].*?>.*?</h[1-6]>', data)
204
+ elif chunk_type == "SOUP":
205
+ delimiter = None
206
+ blocks = data
207
+ elif chunk_type == "DOCUMENT":
208
+ delimiter = "."
209
+ blocks = data.split(delimiter)
210
+ elif chunk_type == "CODE":
211
+ return chunk_source_code(data, max_tokens, model_name, encoding_name, reverse=reverse)
212
+ elif chunk_type == "TEXT":
213
+ return chunk_text_by_tokens(data, max_tokens, model_name, encoding_name, reverse=reverse)
214
+ else:
215
+ delimiter = "\n\n"
216
+ blocks = data.split(delimiter)
217
+
218
+ return chunk_any_to_tokens(blocks, max_tokens, model_name, encoding_name, delimiter, reverse=reverse)
219
+ def chunk_by_language_context(
220
+ text: str,
221
+ max_tokens: int = 1000,
222
+ model_name: str = "gpt-4",
223
+ encoding_name: str = None,
224
+ overlap: int = 0,
225
+ verbose: bool = False,
226
+ reverse: bool = False
227
+ ) -> List[str]:
228
+ """
229
+ Detects language and applies chunking strategy best suited to that context.
230
+ """
231
+ task_params = get_class_inputs(ChunkParams,
232
+ max_tokens=max_tokens,
233
+ model_name=model_name,
234
+ encoding_name=encoding_name,
235
+ overlap=overlap,
236
+ verbose=verbose,
237
+ reverse=reverse
238
+ )
239
+ language = detect_language_from_text(text)
240
+
241
+ if verbose:
242
+ print(f"Detected language: {language}")
243
+
244
+ if language == 'python':
245
+ return chunk_source_code(text, task_params.max_tokens, task_params.model_name, task_params.encoding_name, task_params.reverse)
246
+
247
+ elif language in n('js','typescript'):
248
+ return chunk_by_braces(text, task_params.max_tokens, task_params.model_name, task_params.encoding_name, open='{', close='}', verbose=task_params.verbose)
249
+
250
+ elif language in ('html','php'):
251
+ return chunk_html_by_tag_blocks(text, task_params.max_tokens, task_params.model_name, task_params.encoding_name, verbose=task_params.verbose)
252
+
253
+
254
+ else:
255
+ return strict_token_chunking(text, task_params.max_tokens, task_params.model_name, task_params.encoding_name, task_params.overlap, task_params.verbose)
256
+ def chunk_html_by_tag_blocks(
257
+ html: str,
258
+ max_tokens: int = 1000,
259
+ model_name: str = "gpt-4",
260
+ encoding_name: str = None,
261
+ tags: List[str] = None,
262
+ verbose: bool = False
263
+ ) -> List[str]:
264
+ """
265
+ Chunks HTML using BeautifulSoup, grouping by selected tag blocks (e.g., div, section).
266
+ Each chunk is ≤ max_tokens.
267
+
268
+ Args:
269
+ html (str): HTML input.
270
+ max_tokens (int): Max tokens per chunk.
271
+ model_name (str): Token model name.
272
+ encoding_name (str): Optional encoding override.
273
+ tags (List[str]): Tags to treat as top-level chunks.
274
+ verbose (bool): Print debug info.
275
+
276
+ Returns:
277
+ List[str]: List of HTML chunks.
278
+ """
279
+ task_params = get_class_inputs(ChunkParams,
280
+ max_tokens=max_tokens,
281
+ model_name=model_name,
282
+ encoding_name=encoding_name,
283
+ verbose=verbose,
284
+ tags=tags
285
+
286
+ )
287
+ tags = tags or ["section", "article", "div", "form", "main"]
288
+ soup = BeautifulSoup(html, "html.parser")
289
+
290
+ encoding = (
291
+ tiktoken.get_encoding(task_params.encoding_name)
292
+ if encoding_name else tiktoken.encoding_for_model(task_params.model_name)
293
+ )
294
+
295
+ def count_tokens(text):
296
+ return len(encoding.encode(text))
297
+
298
+ chunks = []
299
+ current_chunk = ""
300
+ current_tokens = 0
301
+
302
+ for element in soup.find_all(tags, recursive=True):
303
+ element_html = str(element)
304
+ element_tokens = count_tokens(element_html)
305
+
306
+ if element_tokens > task_params.max_tokens:
307
+ # Include the element directly if it’s too large
308
+ chunks.append(element_html)
309
+ continue
310
+
311
+ if current_tokens + element_tokens <= task_params.max_tokens:
312
+ current_chunk += "\n" + element_html
313
+ current_tokens += element_tokens
314
+ else:
315
+ chunks.append(current_chunk)
316
+ current_chunk = element_html
317
+ current_tokens = element_tokens
318
+
319
+ if current_chunk:
320
+ chunks.append(current_chunk)
321
+
322
+ if task_params.verbose:
323
+ print(f"Chunked into {len(chunks)} HTML segments")
324
+
325
+ return chunks
326
+ def chunk_text_by_tokens(prompt_data,
327
+ max_tokens,
328
+ model_name="gpt-4",
329
+ encoding_name=None,
330
+ reverse=False):
331
+ task_params = get_class_inputs(ChunkParams,
332
+ max_tokens=max_tokens,
333
+ model_name=model_name,
334
+ encoding_name=encoding_name,
335
+ reverse=reverse
336
+
337
+ )
338
+ sentences = prompt_data.split("\n")
339
+ if task_params.reverse:
340
+ sentences = reversed(sentences)
341
+
342
+ chunks = []
343
+ current_chunk = ""
344
+ current_chunk_tokens = 0
345
+
346
+ for sentence in sentences:
347
+ sentence_tokens = num_tokens_from_string(sentence,task_params.smodel_name, task_params.encoding_name)
348
+
349
+ if current_chunk_tokens + sentence_tokens <= task_params.max_tokens:
350
+ current_chunk += "\n" + sentence if current_chunk else sentence
351
+ current_chunk_tokens += sentence_tokens
352
+ else:
353
+ chunks.append(current_chunk)
354
+ current_chunk = sentence
355
+ current_chunk_tokens = sentence_tokens
356
+
357
+ if current_chunk:
358
+ chunks.append(current_chunk)
359
+
360
+ return chunks
361
+ def chunk_by_braces(text,
362
+ max_tokens,
363
+ model_name,
364
+ encoding_name,
365
+ open='{',
366
+ close='}'):
367
+ """
368
+ Chunks code using balanced brace logic (useful for JS, PHP, etc.).
369
+ """
370
+ task_params = get_class_inputs(ChunkParams,
371
+ max_tokens=max_tokens,
372
+ model_name=model_name,
373
+ encoding_name=encoding_name
374
+ )
375
+ encoding = (
376
+ tiktoken.get_encoding(task_params.encoding_name)
377
+ if encoding_name else tiktoken.encoding_for_model(task_params.model_name)
378
+ )
379
+
380
+ tokens = encoding.encode(text)
381
+ decoded = encoding.decode(tokens)
382
+
383
+ stack = []
384
+ chunks = []
385
+ buffer = ''
386
+ for char in decoded:
387
+ buffer += char
388
+ if char == open:
389
+ stack.append(char)
390
+ elif char == close and stack:
391
+ stack.pop()
392
+
393
+ if not stack and len(encoding.encode(buffer)) >= task_params.max_tokens:
394
+ chunks.append(buffer)
395
+ buffer = ''
396
+
397
+ if buffer.strip():
398
+ chunks.append(buffer)
399
+
400
+ return chunks
401
+
402
+ def extract_python_blocks(source_code: str, reverse: bool = False) -> List[str]:
403
+ """
404
+ Extracts top-level function and class definitions (including decorators) from Python source.
405
+
406
+ Args:
407
+ source_code (str): Python code to analyze.
408
+ reverse (bool): Whether to extract blocks in reverse order.
409
+
410
+ Returns:
411
+ List[str]: List of code blocks (functions or classes).
412
+ """
413
+ reverse = reverse or False
414
+ func_pattern = re.compile(r'^\s*def\s+\w+\s*\(.*\)\s*:', re.MULTILINE)
415
+ class_pattern = re.compile(r'^\s*class\s+\w+\s*(\(.*\))?\s*:', re.MULTILINE)
416
+ decorator_pattern = re.compile(r'^\s*@\w+', re.MULTILINE)
417
+
418
+ lines = source_code.splitlines()
419
+ if reverse:
420
+ lines = list(reversed(lines))
421
+
422
+ blocks = []
423
+ current_block = []
424
+
425
+ for line in lines:
426
+ if func_pattern.match(line) or class_pattern.match(line):
427
+ if current_block:
428
+ if reverse:
429
+ blocks.append("\n".join(reversed(current_block)))
430
+ else:
431
+ blocks.append("\n".join(current_block))
432
+ current_block = []
433
+ current_block.append(line)
434
+ # Include decorators directly above function/class
435
+ if decorator_pattern.match(line) and current_block:
436
+ continue
437
+
438
+ if current_block:
439
+ if reverse:
440
+ blocks.append("\n".join(reversed(current_block)))
441
+ else:
442
+ blocks.append("\n".join(current_block))
443
+
444
+ return list(reversed(blocks)) if reverse else blocks
445
+ def chunk_source_code(source_code: str,
446
+ max_tokens: int,
447
+ model_name="gpt-4",
448
+ encoding_name=None,
449
+ reverse=False):
450
+ task_params = get_class_inputs(ChunkParams,
451
+ max_tokens=max_tokens,
452
+ model_name=model_name,
453
+ encoding_name=encoding_name,
454
+ reverse=reverse
455
+ )
456
+ encoding = (
457
+ tiktoken.get_encoding(task_params.encoding_name)
458
+ if task_params.encoding_name else tiktoken.encoding_for_model(task_params.model_name)
459
+ )
460
+
461
+ def token_count(text): return len(encoding.encode(text))
462
+
463
+ chunks = []
464
+ current_chunk = []
465
+ current_tokens = 0
466
+
467
+ blocks = extract_python_blocks(source_code, task_params.reverse)
468
+
469
+ for block in blocks:
470
+ block_tokens = token_count(block)
471
+ if block_tokens > max_tokens:
472
+ chunks.append(block) # too big, include as is
473
+ elif current_tokens + block_tokens <= task_params.max_tokens:
474
+ current_chunk.append(block)
475
+ current_tokens += block_tokens
476
+ else:
477
+ chunks.append("\n\n".join(current_chunk))
478
+ current_chunk = [block]
479
+ current_tokens = block_tokens
480
+
481
+ if current_chunk:
482
+ chunks.append("\n\n".join(current_chunk))
483
+
484
+ return chunks
485
+
486
+
487
+ def strict_token_chunking(data: str,
488
+ max_tokens: int,
489
+ model_name: str = "gpt-4",
490
+ encoding_name: str = None,
491
+ overlap:int=0,
492
+ verbose:bool=False) -> List[str]:
493
+ """
494
+ Improved chunking method for descriptive summarization. This version uses paragraph-based boundaries
495
+ and ensures token limits are respected. It preserves semantic coherence better than line-based splits.
496
+
497
+ Args:
498
+ data (str): The full input text.
499
+ max_tokens (int): Maximum number of tokens per chunk.
500
+ model_name (str): Model name for tokenization.
501
+ encoding_name (str): Optional encoding override.
502
+
503
+ Returns:
504
+ List[str]: List of token-bound text chunks.
505
+ """
506
+ # Importing token counting utility
507
+ task_params = get_class_inputs(ChunkParams,
508
+ max_tokens=max_tokens,
509
+ model_name=model_name,
510
+ encoding_name=encoding_name,
511
+ overlap=overlap,
512
+ verbose=verbose
513
+ )
514
+ encoding = (
515
+ tiktoken.get_encoding(task_params.encoding_name)
516
+ if task_params.encoding_name
517
+ else tiktoken.encoding_for_model(task_params.model_name)
518
+ )
519
+
520
+ def count_tokens(text):
521
+ return len(encoding.encode(text))
522
+
523
+ paragraphs = re.split(r"\n\s*\n", data.strip()) # split on paragraph gaps
524
+ chunks = []
525
+ current_chunk = []
526
+
527
+ for paragraph in paragraphs:
528
+ trial_chunk = "\n\n".join(current_chunk + [paragraph])
529
+ if count_tokens(trial_chunk) <= task_params.max_tokens:
530
+ current_chunk.append(paragraph)
531
+ else:
532
+ if current_chunk:
533
+ chunks.append("\n\n".join(current_chunk))
534
+ current_chunk = [paragraph]
535
+
536
+ if current_chunk:
537
+ chunks.append("\n\n".join(current_chunk))
538
+
539
+ return chunks
@@ -0,0 +1,2 @@
1
+ from .imports import *
2
+ from .path_utils import *
@@ -0,0 +1,3 @@
1
+ from .imports import *
2
+ from .module_imports import *
3
+
@@ -0,0 +1 @@
1
+ from ...imports import os,shlex
@@ -0,0 +1,8 @@
1
+ from ...string_utils import eatAll
2
+ from ...list_utils import make_list
3
+ from ...type_utils import get_media_exts, is_media_type,MIME_TYPES
4
+ from ...safe_utils import safe_join,get_slash
5
+ from ...class_utils import get_caller_path,get_caller_dir,get_initial_caller,get_initial_caller_dir
6
+ from ...file_utils import is_file,is_dir,is_exists
7
+ from ...ssh_utils import is_file,is_dir,is_exists
8
+ from ...directory_utils import *