collating-machine 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,418 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Python
3
+
4
+ """Copyright (c) Alexander Fedotov.
5
+ This source code is licensed under the license found in the
6
+ LICENSE file in the root directory of this source tree.
7
+ """
8
+ import re
9
+ import html
10
+
11
+
12
+ def plato_text_to_cmj(plato_text, machine_name):
13
+ """
14
+ Transforms platoText format to CMJ format.
15
+ """
16
+ if plato_text is None or not isinstance(plato_text, str):
17
+ raise ValueError("Invalid input: plato_text must be a string.")
18
+
19
+ trimmed_plato_text = plato_text.strip()
20
+ if not trimmed_plato_text:
21
+ return []
22
+
23
+ assistant_name_upper = machine_name.upper()
24
+
25
+ cmj_messages = []
26
+
27
+ message_blocks = re.split(r'\n\n(?=[A-Za-z0-9_-]+:\s*)', trimmed_plato_text)
28
+
29
+ for block in message_blocks:
30
+ current_block = block.strip()
31
+ if not current_block:
32
+ continue
33
+
34
+ speaker_match = re.match(r'^([A-Za-z0-9_-]+):\s*', current_block)
35
+ if not speaker_match:
36
+ continue
37
+
38
+ speaker = speaker_match.group(1)
39
+ raw_utterance = current_block[len(speaker_match.group(0)):]
40
+
41
+ is_thoughts = False
42
+ if raw_utterance.strip().startswith('(thinking)'):
43
+ is_thoughts = True
44
+ raw_utterance = re.sub(r'^\s*\(thinking\)\s*', '', raw_utterance)
45
+
46
+ utterance = re.sub(r'\n{2,}', '\n\t', raw_utterance).strip()
47
+
48
+ final_utterance = utterance
49
+ if is_thoughts:
50
+ final_utterance = f"(thinking) {utterance}"
51
+
52
+ role = 'user'
53
+ if speaker.upper() == assistant_name_upper:
54
+ role = 'assistant'
55
+ elif speaker.upper() == 'INSTRUCTIONS':
56
+ role = 'system'
57
+
58
+ cmj_messages.append({
59
+ 'role': role,
60
+ 'name': speaker,
61
+ 'content': final_utterance
62
+ })
63
+
64
+ return cmj_messages
65
+
66
+
67
+ def cmj_to_plato_text(cmj_messages):
68
+ """
69
+ Transforms a list of CMJ message dicts to platoText format.
70
+ """
71
+ if not isinstance(cmj_messages, list):
72
+ return ""
73
+
74
+ plato_text = ""
75
+
76
+ for message in cmj_messages:
77
+ if isinstance(message, dict) and isinstance(message.get('name'), str) and isinstance(message.get('content'),
78
+ str):
79
+ speaker = message['name'].strip()
80
+
81
+ utterance = re.sub(r'\n{2,}', '\n\t', message['content'])
82
+ utterance = utterance.strip()
83
+
84
+ plato_text += f"{speaker}: {utterance}\n\n"
85
+
86
+ return plato_text
87
+
88
+
89
+ def plato_text_to_muj(plato_text, machine_name):
90
+ """
91
+ Transforms platoText format to MUJ (Multi-User JSON) array for OpenAI
92
+ responses API.
93
+ Consecutive non-assistant messages are grouped into a single 'user' message.
94
+ Assistant messages have only the utterance.
95
+ """
96
+ if plato_text is None or not isinstance(plato_text, str):
97
+ raise ValueError("Invalid input: plato_text must be a string.")
98
+
99
+ trimmed_plato_text = plato_text.strip()
100
+ if not trimmed_plato_text:
101
+ return []
102
+
103
+ assistant_name_upper = machine_name.upper()
104
+
105
+ muj_messages = []
106
+ current_role = None
107
+ current_parts = []
108
+ current_is_thoughts = None
109
+
110
+ message_blocks = re.split(r'\n\n(?=[A-Za-z0-9_-]+:\s*)', trimmed_plato_text)
111
+
112
+ for block in message_blocks:
113
+ current_block = block.strip()
114
+ if not current_block:
115
+ continue
116
+
117
+ speaker_match = re.match(r'^([A-Za-z0-9_-]+):\s*', current_block)
118
+ if not speaker_match:
119
+ continue
120
+
121
+ speaker = speaker_match.group(1)
122
+ raw_utterance = current_block[len(speaker_match.group(0)):]
123
+
124
+ is_thoughts = False
125
+ if raw_utterance.strip().startswith('(thinking)'):
126
+ is_thoughts = True
127
+ raw_utterance = re.sub(r'^\s*\(thinking\)\s*', '', raw_utterance)
128
+
129
+ utterance = re.sub(r'\n{2,}', '\n\t', raw_utterance).strip()
130
+
131
+ final_utterance = utterance
132
+ if is_thoughts:
133
+ final_utterance = f"(thinking) {utterance}"
134
+
135
+ is_assistant_message = speaker.upper() == assistant_name_upper
136
+ role = 'assistant' if is_assistant_message else 'user'
137
+
138
+ if role != current_role or (role == 'assistant' and is_thoughts != current_is_thoughts):
139
+ if len(current_parts) > 0:
140
+ muj_messages.append({
141
+ 'role': current_role,
142
+ 'content': '\n\n'.join(current_parts)
143
+ })
144
+ current_role = role
145
+ current_is_thoughts = is_thoughts
146
+ current_parts = []
147
+
148
+ if is_assistant_message:
149
+ current_parts.append(final_utterance)
150
+ else:
151
+ current_parts.append(f"{speaker}: {final_utterance}")
152
+
153
+ if len(current_parts) > 0:
154
+ muj_messages.append({
155
+ 'role': current_role,
156
+ 'content': '\n\n'.join(current_parts)
157
+ })
158
+
159
+ return muj_messages
160
+
161
+
162
+ def another_plato_text_to_muj(plato_text, machine_name):
163
+ """
164
+ Transforms platoText format to MUJ (Multi-User JSON) array for OpenAI API.
165
+ Consecutive non-assistant messages are grouped into a single 'user' message.
166
+ Assistant messages have a name in each of them and are joined into a single
167
+ utterance.
168
+
169
+ This is for the Theatron type of imitation of performance.
170
+ """
171
+ # trimmed_plato_text = plato_text.strip()
172
+ # if not trimmed_plato_text:
173
+ # return []
174
+
175
+ assistant_name_upper = machine_name.upper()
176
+
177
+ muj_messages = []
178
+ current_role = None
179
+ current_parts = []
180
+
181
+ message_blocks = re.split(r'\n\n(?=[A-Za-z0-9_-]+:\s*)', plato_text)
182
+
183
+ for block in message_blocks:
184
+ current_block = block.strip()
185
+ if not current_block:
186
+ continue
187
+
188
+ speaker_match = re.match(r'^([A-Za-z0-9_-]+):\s*', current_block)
189
+ if not speaker_match:
190
+ continue
191
+
192
+ speaker = speaker_match.group(1)
193
+ raw_utterance = current_block[len(speaker_match.group(0)):]
194
+
195
+ is_thoughts = False
196
+ if raw_utterance.strip().startswith('(thinking)'):
197
+ is_thoughts = True
198
+ raw_utterance = re.sub(r'^\s*\(thinking\)\s*', '', raw_utterance)
199
+
200
+ # questionable replacement of double newline with new paragraph delimiter
201
+ utterance = re.sub(r'\n{2,}', '\n\t', raw_utterance).strip()
202
+
203
+ final_utterance = utterance
204
+ if is_thoughts:
205
+ final_utterance = f"(thinking) {utterance}"
206
+
207
+ is_assistant_message = speaker.upper() == assistant_name_upper
208
+ role = 'assistant' if is_assistant_message else 'user'
209
+
210
+ if role != current_role:
211
+ if len(current_parts) > 0:
212
+ muj_messages.append({
213
+ 'role': current_role,
214
+ 'content': '\n\n'.join(current_parts)
215
+ })
216
+ current_role = role
217
+ current_parts = []
218
+
219
+ current_parts.append(f"{speaker}: {final_utterance}")
220
+
221
+ if len(current_parts) > 0:
222
+ muj_messages.append({
223
+ 'role': current_role,
224
+ 'content': '\n\n'.join(current_parts)
225
+ })
226
+
227
+ return muj_messages
228
+
229
+
230
+ def plato_text_to_mpuj(plato_text, machine_name):
231
+ """
232
+ Transforms platoText format to MPUJ (Multi-Part User JSON) array for Gemini API.
233
+ Consecutive non-model messages are grouped into a single 'user' message
234
+ with multiple parts. Each part includes the speaker's name and utterance.
235
+ Model messages have a single part with the utterance.
236
+ """
237
+ if plato_text is None or not isinstance(plato_text, str):
238
+ raise ValueError("Invalid input: plato_text must be a string.")
239
+
240
+ trimmed_plato_text = plato_text.strip()
241
+ if not trimmed_plato_text:
242
+ return []
243
+
244
+ model_name_upper = machine_name.upper()
245
+
246
+ mpuj_messages = []
247
+ current_role = None
248
+ current_parts = []
249
+
250
+ message_blocks = re.split(r'\n\n(?=[A-Za-z0-9_-]+:\s*)', trimmed_plato_text)
251
+
252
+ for block in message_blocks:
253
+ current_block = block.strip()
254
+ if not current_block:
255
+ continue
256
+
257
+ speaker_match = re.match(r'^([A-Za-z0-9_-]+):\s*', current_block)
258
+ if not speaker_match:
259
+ continue
260
+
261
+ speaker = speaker_match.group(1)
262
+ raw_utterance = current_block[len(speaker_match.group(0)):]
263
+
264
+ is_thoughts = False
265
+ if raw_utterance.strip().startswith('(thinking)'):
266
+ is_thoughts = True
267
+ raw_utterance = re.sub(r'^\s*\(thinking\)\s*', '', raw_utterance)
268
+
269
+ utterance = re.sub(r'\n{2,}', '\n\t', raw_utterance).strip()
270
+
271
+ final_utterance = utterance
272
+ if is_thoughts:
273
+ final_utterance = f"(thinking) {utterance}"
274
+
275
+ is_model_message = speaker.upper() == model_name_upper
276
+ role = 'model' if is_model_message else 'user'
277
+
278
+ if role != current_role:
279
+ if len(current_parts) > 0:
280
+ mpuj_messages.append({
281
+ 'role': current_role,
282
+ 'parts': current_parts
283
+ })
284
+ current_role = role
285
+ current_parts = []
286
+
287
+ current_parts.append({'text': f"{speaker}: {final_utterance}"})
288
+
289
+ if len(current_parts) > 0:
290
+ mpuj_messages.append({
291
+ 'role': current_role,
292
+ 'parts': current_parts
293
+ })
294
+
295
+ return mpuj_messages
296
+
297
+
298
+ def new_plato_text(thoughts, text, machine_name):
299
+ """
300
+ Transforms a pair of text variables 'thoughts' and 'text' received
301
+ from the LLM and cleaned up from the markdown crap into a plato_text
302
+ format as new utterances of this machine, with its name as a speaker.
303
+ Does not form a 'thoughts' utterance if there were not 'thoughts'.
304
+
305
+ The result is later added to the input plato_text that came to the
306
+ machine through a pipe.
307
+ """
308
+ result = ""
309
+ if thoughts and thoughts.strip():
310
+ cleaned_thoughts = re.sub(r'\n{2,}', '\n\t', thoughts.strip())
311
+ result += f"{machine_name}: (thinking) {cleaned_thoughts}\n\n"
312
+
313
+ if text and text.strip():
314
+ cleaned_text = re.sub(r'\n{2,}', '\n\t', text.strip())
315
+ result += f"{machine_name}: {cleaned_text}\n\n"
316
+
317
+ return result
318
+
319
+
320
+ def llm_soup_to_text(llm_response):
321
+ """
322
+ Cleans and transforms text from Large Language Models (LLMs) by:
323
+ - Removing all Markdown formatting (bold, italics, headers, lists, code blocks, links, etc.).
324
+ - Consolidating multiple newlines into a consistent paragraph separator (`\n\t`).
325
+ - Removing extraneous tabs and multiple spaces.
326
+ - Trimming leading/trailing whitespace.
327
+ """
328
+ if not isinstance(llm_response, str):
329
+ return ""
330
+
331
+ text = llm_response
332
+
333
+ # --- Step 0: Unescape HTML Entities First ---
334
+ text = html.unescape(text)
335
+
336
+ # --- Step 1: Normalize Newlines & Initial Cleanup ---
337
+ text = text.replace('\r\n', '\n')
338
+
339
+ # --- Step 2: Remove Fenced Code Blocks completely ---
340
+ # Multiline flag allows ^ to match start of lines.
341
+ text = re.sub(r'^```[^\n]*\n([\s\S]*?)\n```', '', text, flags=re.MULTILINE)
342
+ text = re.sub(r'^~~~[^\n]*\n([\s\S]*?)\n~~~', '', text, flags=re.MULTILINE)
343
+ # Catch ones that might not be at start of line or properly closed with newline
344
+ text = re.sub(r'```[^\n]*\n([\s\S]*?)```', '', text)
345
+ text = re.sub(r'~~~[^\n]*\n([\s\S]*?)~~~', '', text)
346
+
347
+ # --- Step 3: Remove HTML Comments and Tags ---
348
+ text = re.sub(r'<!--[\s\S]*?-->', '', text)
349
+
350
+ # We want <br> to become newline before stripping tags to preserve line breaks
351
+ text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
352
+ # Strip other HTML tags, keeping inner text
353
+ text = re.sub(r'<[^>]+>', '', text)
354
+
355
+ # --- Step 4: Remove Block-Level Markdown Elements ---
356
+ # Horizontal rules
357
+ text = re.sub(r'^\s*(?:-|\*|_){3,}\s*$', '', text, flags=re.MULTILINE)
358
+ # Blockquotes: remove leading >
359
+ text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE)
360
+
361
+ # --- Step 5: Remove Headers ---
362
+ # ATX headers (# Header)
363
+ text = re.sub(r'^\s*#{1,6}\s*', '', text, flags=re.MULTILINE)
364
+ # Setext headers (Underlines Header\n===)
365
+ text = re.sub(r'^([^\n]+)\n\s*(?:=|-){2,}\s*$', r'\1', text, flags=re.MULTILINE)
366
+
367
+ # --- Step 6: Remove Links and Images ---
368
+ # Images: ![alt](url) -> remove entire image
369
+ text = re.sub(r'!\[.*?\]\([^\)]*\)', '', text)
370
+ # Links: [text](url) -> keep `text`
371
+ text = re.sub(r'\[([^\]]+)\]\([^\)]*\)', r'\1', text)
372
+
373
+ # --- Step 7: Remove Inline Markdown Elements ---
374
+ text = re.sub(r'`([^`]+)`', r'\1', text)
375
+ # Bold / Italic - ensure we catch both, ordering matters (*** then ** then *)
376
+ text = re.sub(r'\*{3}([^*]+)\*{3}', r'\1', text)
377
+ text = re.sub(r'_{3}([^_]+)_{3}', r'\1', text)
378
+ text = re.sub(r'\*{2}([^*]+)\*{2}', r'\1', text)
379
+ text = re.sub(r'_{2}([^_]+)_{2}', r'\1', text)
380
+ text = re.sub(r'\*([^*]+)\*', r'\1', text)
381
+ text = re.sub(r'_([^_]+)_', r'\1', text)
382
+ # Strikethrough
383
+ text = re.sub(r'~~([^~]+)~~', r'\1', text)
384
+
385
+ # --- Step 8: Remove List Markers ---
386
+ text = re.sub(r'^\s*(?:[-*+]|\d+\.)\s+', '', text, flags=re.MULTILINE)
387
+
388
+ # --- Step 9: Final Whitespace & Paragraph Normalization ---
389
+ # Strip spaces from each line
390
+ lines = [line.strip() for line in text.split('\n')]
391
+ text = '\n'.join(lines)
392
+
393
+ # Convert tabs and multiple spaces to single space
394
+ text = text.replace('\t', ' ')
395
+ text = re.sub(r' {2,}', ' ', text)
396
+
397
+ # Consolidate multiple newlines to paragraph separator `\n\t`
398
+ text = re.sub(r'\n{2,}', '\n\t', text)
399
+
400
+ # --- Step 10: Final Trimming ---
401
+ text = text.strip()
402
+ text = re.sub(r'^[\n\t]+', '', text)
403
+ text = re.sub(r'[\n\t]+$', '', text)
404
+
405
+ return text
406
+
407
+
408
+ def messages_to_mpj(messages):
409
+ contents = []
410
+ for message in messages:
411
+ if message['role'] == 'user':
412
+ obj = dict(role='user', parts=[dict(text=message['content'])])
413
+ elif message['role'] == 'assistant':
414
+ obj = dict(role='model', parts=[dict(text=message['content'])])
415
+ else:
416
+ obj = {}
417
+ contents.append(obj)
418
+ return contents
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: collating-machine
3
+ Version: 0.0.1
4
+ Summary: A Machine that collects, verifies, compares and puts in order - collates.
5
+ Author-email: Machina Ratiocinatrix <machina.ratio@gmail.com>, Alexander Fedotov <alex.fedotov@aol.com>
6
+ Project-URL: Homepage, https://github.com/collating-machine/collating-machine
7
+ Keywords: collating-machine
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: pyyaml==6.0.3
15
+ Dynamic: license-file
16
+
17
+ # Collating-Machine
18
+ A Machine that collects, verifies, compares and puts in order - collates.
19
+
20
+ In order to launch it from the command line or as a Python subprocess:
21
+ ```bash
22
+ echo "Theodotos-Alexandreus: What is the most important fact here, machine?" \
23
+ | uvx collating-machine \
24
+ --provider-api-key sk-proj-... \
25
+ --github-token ghp_...
26
+ ```
27
+
28
+ Or, with a local pip installation:
29
+ ```bash
30
+ pip install collating-machine
31
+ ```
32
+ Set the environment variables:
33
+ ```bash
34
+ export PROVIDER_API_KEY="sk-proj-..."
35
+ export GITHUB_TOKEN="ghp_..."
36
+ ```
37
+ Then:
38
+ ```bash
39
+ collating-machine -a multilogue.txt
40
+ ```
41
+ Or:
42
+ ```bash
43
+ collating-machine multilogue.txt > response.txt
44
+ ```
45
+ Or:
46
+ ```bash
47
+ collating-machine -a multilogue.txt > tmp && echo tmp > multilogue.txt
48
+ ```
49
+
50
+ Or use it in your Python code:
51
+ ```Python
52
+ # Python
53
+ import collating_machine
54
+ ```
@@ -0,0 +1,22 @@
1
+ collating_machine/__init__.py,sha256=7OUS-rxlkIeI981oLwHdfSX2hH3olbE-gvyXT4YDZn4,522
2
+ collating_machine/cli.py,sha256=FrL7rm6qy5uwQ3EFPxChaHcXMJWLa-a-psOEe2UYjgg,5784
3
+ collating_machine/config.py,sha256=PFhpe7DMygkx-R1i3dhUl0p0h3xKb71pA-lZ9gV0gZo,1507
4
+ collating_machine/githf.py,sha256=O7kMfICPhIhpXTg4NQOs2Y9yX9j0K8-eObdGo_rwthY,2295
5
+ collating_machine/machina.yaml,sha256=kHcqqoymvLJ6m79fAC3DgNTx39IArY5eysl5zEQzATs,470
6
+ collating_machine/machine.py,sha256=X7gHYFh5kcRw0gIuZtl3buSyg5D9FZ56DtThJnf6RJ8,6178
7
+ collating_machine/utilities.py,sha256=IAqnSHb836pZTbWLPX4oHJh6TrSJNXUTLn9RemuMpos,13935
8
+ collating_machine/providers/__init__.py,sha256=4lVYZ3KJxbhfVGwdKCgEkaKd7oW9EXLbkDZNcN_dRlY,190
9
+ collating_machine/providers/basta.py,sha256=VGPR1ZeaTzQNtYiVwHQlklTyfw5WcBEaph4jcbZo75k,2626
10
+ collating_machine/providers/camelids.py,sha256=8RdA6A0Sutbresk0O2eHx0107Vzc0zom1ZQMS1EnWc0,3200
11
+ collating_machine/providers/castor_pollux.py,sha256=yAAknPujuBR-uZHOh1pjm7-E116dOrMMV71MTuK_69g,4524
12
+ collating_machine/providers/depsek.py,sha256=Hiy30TB4i4vIIgzbvD51LzTOPxPMhgm9bQxLjsMGDVU,2604
13
+ collating_machine/providers/electroid.py,sha256=YtU69oz9Cs0RiwhjlmFZdXXhgEEJELHn7Y5fgwvp2aI,2497
14
+ collating_machine/providers/openai.py,sha256=1mkerve3L2nikv-pMS6g9FKbCMANNVH2eFdAFWzLQMw,2600
15
+ collating_machine/providers/qrog.py,sha256=scs__bOBy9pPubmVjlHRZI0ykzo1CNxIuM8kDy7MThY,2573
16
+ collating_machine/providers/strangelove.py,sha256=ww_OhG-Jh4e1b1aC9ax33a87_K7xI0BxzYd1m6AECHg,2446
17
+ collating_machine-0.0.1.dist-info/licenses/LICENSE,sha256=BN2PMVuQzswbex0foJhVXSY4XI_Ud4o-Xc9knovYUeU,1074
18
+ collating_machine-0.0.1.dist-info/METADATA,sha256=yzF9xJvW2UnKpOM0FRYslT6MpBrij6Lu20Viq1eI0io,1462
19
+ collating_machine-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
20
+ collating_machine-0.0.1.dist-info/entry_points.txt,sha256=rQJNTO8EeG8oZAWa6A7KQUw2Ou4qlFdA546JlnIz3NI,64
21
+ collating_machine-0.0.1.dist-info/top_level.txt,sha256=-A0JFpsn4WMVLOzVZbpM9FAiEFuopqQX9fXz-6slE8M,18
22
+ collating_machine-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ collating-machine = collating_machine.cli:run
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Alexander Fedotov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ collating_machine