bioLOLPython 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1049 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ #
5
+ # Additional features by Chen Hsieh (2025):
6
+ # - Updated codebase to Python 3
7
+ # - Meme-style biological sequence interpreter ("bioLOLCODE")
8
+ # - Commands added: DNA GO ..., REVERSE THAT ..., GC BOMB ..., TRANSCRIBE, TRANSLATE
9
+ # - Fun science ops: ALIGN A WIT B, I CRAVE VIOLENCE, etc.
10
+ # - Uses Biopython under the hood
11
+ #
12
+ # Inspired by LOLCODE and Gen Z chaos, powered by 🧬
13
+ #
14
+ # Project repo: https://github.com/ChenHsieh/bioLOLPython
15
+
16
+
17
+ Implementation of the LOLPython language.
18
+ Converts from LOLPython to Python then optionally runs the Python.
19
+
20
+ This package depends on PLY -- http://www.dabeaz.com/ply/
21
+
22
+ Written by Andrew Dalke <dalke@dalkescientific.com>
23
+ Dalke Scientific Software, LLC
24
+ 1 June 2007, Gothenburg, Sweden
25
+
26
+ This software is in the public domain. For details see:
27
+ http://creativecommons.org/licenses/publicdomain/
28
+
29
+
30
+ """
31
+
32
+
33
+ import sys
34
+ import keyword
35
+ import os
36
+ import types
37
+ from io import StringIO
38
+ from ply import lex
39
+ from bioLOLPython.bio.sequence import BioSeq
40
+ import codecs
41
+ import re
42
+ import random
43
+
44
+ bio_vars = {} # Store sequences by name, e.g., BRRRR
45
+ __NAME__ = "lolpython"
46
+ __VERSION__ = "1.0"
47
+
48
+ # Translating LOLPython tokens to Python tokens
49
+ # This could be cleaned up. For example, some of
50
+ # these tokens could be merged into one.
51
+
52
+ tokens = (
53
+ "NAME", # variable names
54
+ "RESERVED", # Used for Python reserved names
55
+ "NUMBER", # Integers and floats
56
+ "STRING",
57
+ "OP", # Like the Python OP
58
+ "CLOSE", # Don't really need this..
59
+
60
+ "COMMENT",
61
+ "AUTOCALL", # write t.value then add '('
62
+ "INLINE", # write t.value directly
63
+ "FUTURE", # for the "I FUTURE CAT WITH" statement
64
+ "PRINT", # VISIBLE -> stdout or COMPLAIN -> stderr
65
+
66
+ "ENDMARKER",
67
+ "COLON",
68
+ "WS",
69
+ "NEWLINE",
70
+ )
71
+
72
+
73
+ # Helper functions for making given token types
74
+ def OP(t, value):
75
+ t.type = "OP"
76
+ t.value = value
77
+ return t
78
+
79
+ def RESERVED(t, value):
80
+ t.type = "RESERVED"
81
+ t.value = value
82
+ return t
83
+
84
+ def AUTOCALL(t, value):
85
+ t.type = "AUTOCALL"
86
+ t.value = "tuple"
87
+ t.lexer.paren_stack.append(")")
88
+ return t
89
+
90
+ def INLINE(t, value):
91
+ t.type = "INLINE"
92
+ t.value = value
93
+ return t
94
+
95
+ #####
96
+
97
+ # ply uses a large regex for token detection, and sre is limited to
98
+ # 100 groups. This grammar pushes the limit. I use (?:non-grouping)
99
+ # parens to keep the count down.
100
+
101
+
102
+ def t_ASSIGN(t): # cannot be a simple pattern because it must
103
+ r'CAN[ ]+HA[SZ]\b' # come before the t_NAME definition
104
+ return OP(t, "=")
105
+
106
+ def t_SINGLE_QUOTE_STRING(t):
107
+ r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
108
+ t.type = "STRING"
109
+ t.value = codecs.decode(t.value[1:-1], "unicode_escape")
110
+ return t
111
+
112
+ def t_DOUBLE_QUOTE_STRING(t):
113
+ r'"([^\\"]+|\\"|\\\\)*"'
114
+ t.type = "STRING"
115
+ t.value = codecs.decode(t.value[1:-1], "unicode_escape")
116
+ return t
117
+
118
+ # and LOL quoted strings! They end with /LOL
119
+ # No way to have "/LOL" in the string.
120
+ def t_LOL_STRING(t):
121
+ r"LOL[ ]*((?!/LOL).|\n)*[ ]*/LOL"
122
+ t.type = "STRING"
123
+ t.value = t.value[3:-4].strip(" ")
124
+ return t
125
+
126
+ # Aliases for the same thing - for extra cuteness
127
+ def t_LSQUARE(t):
128
+ r"(?:SOME|LOOK[ ]AT|LET[ ]+THE)\b"
129
+ t.lexer.paren_stack.append(']')
130
+ return OP(t, "[")
131
+
132
+ def t_LPAREN(t):
133
+ r"(?:WIT|THEZ)\b"
134
+ t.lexer.paren_stack.append(')')
135
+ return OP(t, "(")
136
+
137
+ def t_LBRACE(t):
138
+ r"BUCKET\b"
139
+ t.lexer.paren_stack.append("}")
140
+ return OP(t, "{")
141
+
142
+ def t_CLOSE(t):
143
+ r"(?:OK(!+|\b)|!+)"
144
+ stack = t.lexer.paren_stack
145
+ if t.value.startswith("OK"):
146
+ num_closes = len(t.value)-1 # OK -> 1, OK! -> 2, OK!!->3
147
+ else:
148
+ num_closes = len(t.value) # ! -> 1, !! -> 2
149
+ # Which close is this? I use "OK" to match (, [ and {
150
+ if len(stack) < num_closes:
151
+ raise AssertionError("not enough opens on the stack: line %d"
152
+ % (t.lineno,))
153
+ t.value = "".join(stack[-num_closes:][::-1])
154
+ del stack[-num_closes:]
155
+ return t
156
+
157
+ def t_EQ(t):
158
+ r"KINDA[ ]+LIKE\b"
159
+ return OP(t, "==")
160
+
161
+ def t_NE(t):
162
+ r"(?:KINDA[ ]+)?NOT[ ]+LIKE\b"
163
+ return OP(t, "!=")
164
+
165
+ def t_is(t):
166
+ r"KINDA[ ]+IS\b"
167
+ return RESERVED(t, "is")
168
+
169
+ def t_GT(t):
170
+ r"ATE[ ]+MORE[ ]+CHEEZBURGERS?[ ]+THAN\b"
171
+ return OP(t, ">")
172
+
173
+ def t_LT(t):
174
+ r"ATE[ ]+FEWER[ ]+CHEEZBURGERS?[ ]+THAN\b"
175
+ return OP(t, "<")
176
+
177
+ def t_GTE(t):
178
+ r"BIG[ ]+LIKE\b"
179
+ return OP(t, ">=")
180
+
181
+ def t_LTE(t):
182
+ r"SMALL[ ]+LIKE\b"
183
+ return OP(t, "<=")
184
+
185
+ def t_RETURN(t):
186
+ r"U[ ]+TAKE\b"
187
+ return RESERVED(t, "return")
188
+
189
+ def t_yield(t):
190
+ r"U[ ]+BORROW\b"
191
+ return RESERVED(t, "yield")
192
+
193
+ def t_ELIF(t):
194
+ r"OR[ ]+IZ\b"
195
+ return RESERVED(t, "elif")
196
+
197
+ def t_ELSE(t):
198
+ r"(?:(?:I[ ]+GIVE[ ]+UP|IZ[ ]+KEWL|ALL[ ]+DONE)|NOPE)\b"
199
+ return RESERVED(t, "else")
200
+
201
+ def t_COLON(t):
202
+ r"\?"
203
+ t.value = ":"
204
+ return t
205
+
206
+ def t_FROM(t):
207
+ r"IN[ ]+MAI\b"
208
+ return RESERVED(t, "from")
209
+
210
+ def t_EXCEPT(t):
211
+ r"O[ ]+NOES\b"
212
+ return RESERVED(t, "except")
213
+
214
+ def t_PLUS(t):
215
+ r"ALONG[ ]+WITH\b"
216
+ return OP(t, "+")
217
+ def t_MINUS(t):
218
+ r"TAKE[ ]+AWAY\b"
219
+ return OP(t, "-")
220
+
221
+ def t_PLUS_EQUAL(t):
222
+ r"GETZ[ ]+ANOTHR\b"
223
+ return OP(t, "+=")
224
+
225
+ def t_MINUS_EQUAL(t):
226
+ r"THROW[SZ]?[ ]+AWAY\b"
227
+ return OP(t, "-=")
228
+
229
+ def t_DIV(t):
230
+ r"SMASHES[ ]+INTO\b"
231
+ return OP(t, "/")
232
+
233
+ def t_DIV_EQUAL(t):
234
+ r"SMASHES[ ]+INTO[ ]+HAS\b"
235
+ return OP(t, "/=")
236
+
237
+ def t_TRUEDIV(t):
238
+ r"SMASHES[ ]+NICELY[ ]+INTO\b"
239
+ return OP(t, "//")
240
+
241
+ def t_MUL(t):
242
+ r"OF[ ]THOSE\b"
243
+ return OP(t, "*")
244
+
245
+ def t_MUL_EQUAL(t):
246
+ r"COPIES[ ]+(?:HIM|HER|IT)SELF[ ]+BY\b"
247
+ return OP(t, "*=")
248
+
249
+ def t_POW(t):
250
+ r"BY[ ]+GRAYSKULL[ ]+POWER"
251
+ return OP(t, "**")
252
+
253
+ def t_IN(t):
254
+ r"IN[ ]+(?:UR|THE|THIS)\b"
255
+ return OP(t, "in")
256
+
257
+ def t_del(t):
258
+ r"DO[ ]+NOT[ ]+WANT\b"
259
+ return RESERVED(t, "del")
260
+
261
+ def t_and(t):
262
+ r"\&"
263
+ return RESERVED(t, "and")
264
+
265
+ def t_or(t):
266
+ r"OR[ ]+MABEE\b"
267
+ return RESERVED(t, "or")
268
+
269
+ def t_pass(t):
270
+ r"I[ ]+IZ[ ]+CUTE\b"
271
+ return RESERVED(t, "pass")
272
+
273
+ def t_forever(t):
274
+ r"WHILE[ ]+I[ ]+CUTE\b"
275
+ return INLINE(t, "while 1")
276
+
277
+ def t_def(t):
278
+ r"SO[ ]+IM[ ]+LIKE\b"
279
+ return RESERVED(t, "def")
280
+
281
+ def t_class(t):
282
+ r"ME[ ]+MAKE[ ]\b"
283
+ return RESERVED(t, "class")
284
+
285
+ def t_future(t):
286
+ r"I[ ]+FUTURE[ ]+CAT[ ]+WITH\b"
287
+ t.type = "FUTURE"
288
+ return t
289
+
290
+ def t_assert(t):
291
+ r"SO[ ]+GOOD\b"
292
+ return RESERVED(t, "assert")
293
+
294
+ def t_assert_not(t):
295
+ r"AINT[ ]+GOOD\b"
296
+ return INLINE(t, "assert not ")
297
+
298
+ def t_for(t):
299
+ r"GIMME[ ]+EACH\b"
300
+ return RESERVED(t, "for")
301
+
302
+ def t_list(t):
303
+ r"ALL[ ]+OF\b"
304
+ return AUTOCALL(t, "tuple")
305
+
306
+ RESERVED_VALUES = {
307
+ "EASTERBUNNY": ("NUMBER", "0"),
308
+ "CHEEZBURGER": ("NUMBER", "1"),
309
+ "CHOKOLET": ("NUMBER", "-1"),
310
+ "TWIN": ("NUMBER", "2"),
311
+ "TWINZ": ("NUMBER", "2"),
312
+ "TWINS": ("NUMBER", "2"),
313
+ "EVILTWIN": ("NUMBER", "-2"),
314
+ "EVILTWINZ": ("NUMBER", "-2"),
315
+ "EVILTWINS": ("NUMBER", "-2"),
316
+ "ALLFINGERZ": ("NUMBER", "10"),
317
+ "TOEZ": ("NUMBER", "-10"),
318
+ "ONE": ("NUMBER", "1"),
319
+ "ONCE": ("NUMBER", "1"),
320
+ "TWO": ("NUMBER", "2"),
321
+ "TWICE": ("NUMBER", "2"),
322
+ "THR33": ("NUMBER", "3"),
323
+ "FOUR": ("NUMBER", "4"),
324
+ "FIV": ("NUMBER", "5"),
325
+ "SIKS": ("NUMBER", "6"),
326
+ "SEVN": ("NUMBER", "7"),
327
+ "ATE": ("NUMBER", "8"),
328
+ "NINE": ("NUMBER", "9"),
329
+ "MEH": ("NAME", "False"),
330
+ "YEAH": ("NAME", "True"),
331
+ "VISIBLE": ("PRINT", "stdout"),
332
+ "COMPLAIN": ("PRINT", "stderr"),
333
+ "AND": ("OP", ","),
334
+ "BLACKHOLE": ("RESERVED", "ZeroDivisionError"),
335
+ "DONOTLIKE": ("AUTOCALL", "AssertionError"),
336
+
337
+ "ANTI": ("OP", "-"),
338
+ "IZ": ("RESERVED", "if"),
339
+ "GIMME": ("RESERVED", "import"),
340
+ "LIKE": ("RESERVED", "as"),
341
+ "OWN": ("OP", "."),
342
+
343
+ "PLZ": ("RESERVED", "try"),
344
+ "HALP": ("RESERVED", "raise"),
345
+ "WHATEVER": ("RESERVED", "finally"),
346
+ "KTHX": ("RESERVED", "continue"),
347
+ "KTHXBYE": ("RESERVED", "break"),
348
+
349
+ "OVER": ("OP", "/"),
350
+
351
+ "AINT": ("RESERVED", "not"),
352
+ "ME": ("RESERVED", "self"),
353
+
354
+ "STRING": ("AUTOCALL", "str"),
355
+ "NUMBR": ("AUTOCALL", "int"),
356
+ "BIGNESS": ("AUTOCALL", "len"),
357
+ "NUMBRZ": ("AUTOCALL", "range"),
358
+ "ADDED": ("AUTOCALL", ".append"),
359
+
360
+ "ARGZ": ("INLINE", "_lol_sys.argv"),
361
+ "THINGZ": ("INLINE", "()"), # invisible tuple didn't sound right
362
+ "THING": ("INLINE", "()"), # sometimes it's better in singular form
363
+ "MY": ("INLINE", "self."),
364
+ "MYSELF": ("INLINE", "(self)"),
365
+
366
+ "EVEN": ("INLINE", "% 2 == 0"),
367
+ "ODD": ("INLINE", "% 2 == 1"),
368
+ "WIF": ("RESERVED", "with"),
369
+ "ITSLIKETHIS":("RESERVED","class")
370
+ }
371
+
372
+ def t_FLOAT(t):
373
+ r"""(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]? \d+)?"""
374
+ t.value = t.value
375
+ t.type = "NUMBER"
376
+ return t
377
+
378
+ def t_INT(t):
379
+ r"\d+"
380
+ t.type = "NUMBER"
381
+ return t
382
+
383
+ def t_INVISIBLE(t):
384
+ r"INVISIBLE([ ]+(LIST|STRING|BUCKET))?\b"
385
+ if "LIST" in t.value:
386
+ t.type = "INLINE"
387
+ t.value = "[]"
388
+ elif "STRING" in t.value:
389
+ t.type = "INLINE"
390
+ t.value = '""'
391
+ elif "BUCKET" in t.value:
392
+ t.type = "INLINE"
393
+ t.value = "{}"
394
+ else:
395
+ RESERVED(t, "None")
396
+ return t
397
+
398
+ # Not consuming the newline. Needed for "IZ EASTERBUNNY? BTW comment"
399
+ def t_COMMENT(t):
400
+ r"[ ]*(?:BTW|WTF)[^\n]*"
401
+ return t
402
+
403
+ def t_NAME(t):
404
+ r'[a-zA-Z_][a-zA-Z0-9_]*'
405
+ if t.value in RESERVED_VALUES:
406
+ type, value = RESERVED_VALUES[t.value]
407
+ t.type = type
408
+ t.value = value
409
+ if t.type == "AUTOCALL":
410
+ t.lexer.paren_stack.append(")")
411
+ return t
412
+
413
+ def t_WS(t):
414
+ r' [ ]+ '
415
+ if t.lexer.at_line_start and not t.lexer.paren_stack:
416
+ return t
417
+
418
+
419
+ # Don't generate newline tokens when inside of parens
420
+ def t_newline(t):
421
+ r'\n+'
422
+ t.lexer.lineno += len(t.value)
423
+ t.type = "NEWLINE"
424
+ if not t.lexer.paren_stack:
425
+ return t
426
+
427
+
428
+ def t_error(t):
429
+ raise SyntaxError("Unknown symbol %r" % (t.value[0],))
430
+
431
+
432
+
433
+ ## I implemented INDENT / DEDENT generation as a post-processing filter
434
+
435
+ # The original lex token stream contains WS and NEWLINE characters.
436
+ # WS will only occur before any other tokens on a line.
437
+
438
+ # I have three filters. One tags tokens by adding two attributes.
439
+ # "must_indent" is True if the token must be indented from the
440
+ # previous code. The other is "at_line_start" which is True for WS
441
+ # and the first non-WS/non-NEWLINE on a line. It flags the check so
442
+ # see if the new line has changed indication level.
443
+
444
+ # Python's syntax has three INDENT states
445
+ # 0) no colon hence no need to indent
446
+ # 1) "if 1: go()" - simple statements have a COLON but no need for an indent
447
+ # 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
448
+
449
+ NO_INDENT = 0
450
+ MAY_INDENT = 1
451
+ MUST_INDENT = 2
452
+
453
+ # only care about whitespace at the start of a line
454
+ def track_tokens_filter(lexer, tokens):
455
+ lexer.at_line_start = at_line_start = True
456
+ indent = NO_INDENT
457
+
458
+ for token in tokens:
459
+ token.at_line_start = at_line_start
460
+
461
+ if token.type == "COLON":
462
+ at_line_start = False
463
+ indent = MAY_INDENT
464
+ token.must_indent = False
465
+
466
+ elif token.type == "NEWLINE":
467
+ at_line_start = True
468
+ if indent == MAY_INDENT:
469
+ indent = MUST_INDENT
470
+ token.must_indent = False
471
+
472
+ elif token.type == "WS":
473
+ assert token.at_line_start == True
474
+ at_line_start = True
475
+ token.must_indent = False
476
+
477
+ elif token.type == "COMMENT":
478
+ pass
479
+
480
+ else:
481
+ # A real token; only indent after COLON NEWLINE
482
+ if indent == MUST_INDENT:
483
+ token.must_indent = True
484
+ else:
485
+ token.must_indent = False
486
+ at_line_start = False
487
+
488
+ indent = NO_INDENT
489
+
490
+ yield token
491
+ lexer.at_line_start = at_line_start
492
+
493
+ def _new_token(type, lineno):
494
+ tok = lex.LexToken()
495
+ tok.type = type
496
+ tok.value = None
497
+ tok.lineno = lineno
498
+ tok.lexpos = -1
499
+ return tok
500
+
501
+ # Synthesize a DEDENT tag
502
+ def DEDENT(lineno):
503
+ return _new_token("DEDENT", lineno)
504
+
505
+ # Synthesize an INDENT tag
506
+ def INDENT(lineno):
507
+ return _new_token("INDENT", lineno)
508
+
509
+
510
+ # Track the indentation level and emit the right INDENT / DEDENT events.
511
+ def indentation_filter(tokens):
512
+ # A stack of indentation levels; will never pop item 0
513
+ levels = [0]
514
+ token = None
515
+ depth = 0
516
+ prev_was_ws = False
517
+ for token in tokens:
518
+ ## if 1:
519
+ ## print "Process", token,
520
+ ## if token.at_line_start:
521
+ ## print "at_line_start",
522
+ ## if token.must_indent:
523
+ ## print "must_indent",
524
+ ## print
525
+
526
+ # WS only occurs at the start of the line
527
+ # There may be WS followed by NEWLINE so
528
+ # only track the depth here. Don't indent/dedent
529
+ # until there's something real.
530
+ if token.type == "WS":
531
+ assert depth == 0
532
+ depth = len(token.value)
533
+ prev_was_ws = True
534
+ # Don't forward WS to the parser
535
+ continue
536
+
537
+ if token.type == "NEWLINE":
538
+ depth = 0
539
+ if prev_was_ws or token.at_line_start:
540
+ # ignore blank lines
541
+ continue
542
+ # pass the other cases on through
543
+ yield token
544
+ continue
545
+
546
+ if token.type == "COMMENT":
547
+ yield token
548
+ continue
549
+
550
+ # then it must be a real token (not WS, not NEWLINE)
551
+ # which can affect the indentation level
552
+
553
+ prev_was_ws = False
554
+ if token.must_indent:
555
+ # The current depth must be larger than the previous level
556
+ if not (depth > levels[-1]):
557
+ raise IndentationError("expected an indented block")
558
+
559
+ levels.append(depth)
560
+ yield INDENT(token.lineno)
561
+
562
+ elif token.at_line_start:
563
+ # Must be on the same level or one of the previous levels
564
+ if depth == levels[-1]:
565
+ # At the same level
566
+ pass
567
+ elif depth > levels[-1]:
568
+ raise IndentationError("indentation increase but not in new block")
569
+ else:
570
+ # Back up; but only if it matches a previous level
571
+ try:
572
+ i = levels.index(depth)
573
+ except ValueError:
574
+ raise IndentationError("inconsistent indentation")
575
+ for _ in range(i+1, len(levels)):
576
+ yield DEDENT(token.lineno)
577
+ levels.pop()
578
+
579
+ yield token
580
+
581
+ ### Finished processing ###
582
+
583
+ # Must dedent any remaining levels
584
+ if len(levels) > 1:
585
+ assert token is not None
586
+ for _ in range(1, len(levels)):
587
+ yield DEDENT(token.lineno)
588
+
589
+
590
+ # The top-level filter adds an ENDMARKER, if requested.
591
+ # Python's grammar uses it.
592
+ def token_filter(lexer, add_endmarker = True):
593
+ token = None
594
+ tokens = iter(lexer.token, None)
595
+ tokens = track_tokens_filter(lexer, tokens)
596
+ for token in indentation_filter(tokens):
597
+ yield token
598
+
599
+ if add_endmarker:
600
+ lineno = 1
601
+ if token is not None:
602
+ lineno = token.lineno
603
+ yield _new_token("ENDMARKER", lineno)
604
+
605
+ class LOLLexer(object):
606
+ def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
607
+ self.lexer = lex.lex(debug=debug, optimize=optimize,
608
+ lextab=lextab, reflags=reflags)
609
+ self.token_stream = None
610
+ def input(self, s, add_endmarker=True):
611
+ self.lexer.paren_stack = []
612
+ self.lexer.input(s)
613
+ self.token_stream = token_filter(self.lexer, add_endmarker)
614
+ def token(self):
615
+ try:
616
+ return next(self.token_stream)
617
+ except StopIteration:
618
+ return None
619
+
620
+ # Helper class to generate logically correct indented Python code
621
+ class IndentWriter(object):
622
+ def __init__(self, outfile):
623
+ self.outfile = outfile
624
+ self.at_first_column = True
625
+ self.indent = 0
626
+ def write(self, text):
627
+ if self.at_first_column:
628
+ self.outfile.write(" "*self.indent)
629
+ self.at_first_column = False
630
+ self.outfile.write(text)
631
+
632
+ # Split things up because the from __future__ statements must
633
+ # go before any other code.
634
+
635
+ HEADER = """# LOLPython to Python converter version 1.0
636
+ # Written by Andrew Dalke, who should have been working on better things.
637
+
638
+ """
639
+
640
+ BODY = """
641
+ # sys is used for COMPLAIN and ARGZ
642
+ import sys as _lol_sys
643
+
644
+ """
645
+
646
+ def to_python(s):
647
+ L = LOLLexer()
648
+ L.input(s)
649
+
650
+ header = StringIO()
651
+ header.write(HEADER)
652
+ header_output = IndentWriter(header)
653
+
654
+ body = StringIO()
655
+ body.write(BODY)
656
+ body_output = IndentWriter(body)
657
+
658
+ write = body_output.write
659
+ output = body_output
660
+
661
+ for t in iter(L.token_stream):
662
+ if t.type == "NAME":
663
+ # Need to escape names which are Python variables Do that
664
+ # by appending an "_". But then I also need to make sure
665
+ # that "yield_" does not collide with "yield". And you
666
+ # thought you were being clever trying to use a Python
667
+ # variable. :)
668
+ name = t.value.rstrip("_")
669
+ if name in keyword.kwlist:
670
+ write(t.value + "_ ")
671
+ else:
672
+ write(t.value + " ")
673
+
674
+ elif t.type in ("RESERVED", "OP", "NUMBER", "CLOSE"):
675
+ # While not pretty, I'll put a space after each
676
+ # term because it's the simplest solution. Otherwise
677
+ # I'll need to track the amount of whitespace between
678
+ # the tokens in the original text.
679
+ write(t.value+" ")
680
+
681
+ # XXX escape names which are special in Python!
682
+ elif t.type == "STRING":
683
+ write(repr(t.value) + " ")
684
+
685
+ elif t.type == "COMMENT":
686
+ # Not enough information to keep comments on the correct
687
+ # indentation level. This is good enough. Ugly though.
688
+ # Maybe I need to fix the tokenizer.
689
+ write("#"+ t.value[3:]+"\n")
690
+ output.at_first_column = True
691
+
692
+ elif t.type == "COLON":
693
+ write(":")
694
+
695
+ elif t.type == "INDENT":
696
+ output.indent += 1
697
+ pass
698
+
699
+ elif t.type == "DEDENT":
700
+ output.indent -= 1
701
+ pass
702
+
703
+ elif t.type == "NEWLINE":
704
+ write(t.value)
705
+ output.at_first_column = True
706
+ output = body_output
707
+ write = output.write
708
+
709
+ elif t.type == "PRINT":
710
+ if t.value == "stdout":
711
+ write("print ")
712
+ elif t.value == "stderr":
713
+ write("print(..., file=_lol_sys.stderr) ")
714
+ else:
715
+ raise AssertionError(t.value)
716
+
717
+ elif t.type == "AUTOCALL":
718
+ write(t.value + "(")
719
+
720
+ elif t.type == "INLINE":
721
+ write(t.value)
722
+
723
+ elif t.type == "ENDMARKER":
724
+ write("\n# The end.\n")
725
+
726
+ elif t.type == "WS":
727
+ output.leading_ws = t.value
728
+
729
+ elif t.type == "FUTURE":
730
+ # Write to the header. This is a hack. Err, a hairball.
731
+ output = header_output
732
+ write = output.write
733
+ write("from __future__ import ")
734
+
735
+ else:
736
+ raise AssertionError(t.type)
737
+
738
+ return header.getvalue() + body.getvalue()
739
+
740
+
741
+ # API code for doing the translation and exec'ing the result
742
+
743
+ #####################################
744
+ # bioLOLCODE Extension Commands 🧬😹
745
+ # Added by Chen Hsieh, 2025
746
+ # - Meme-style DNA sequence tools
747
+ # - Dialect system for internet slang eras
748
+ #####################################
749
+ from bioLOLPython.dialects import get_dialect, list_dialects, dialect_names
750
+
751
+ _current_dialect_name = "lolcat"
752
+
753
+
754
+ def set_dialect(name):
755
+ global _current_dialect_name
756
+ get_dialect(name) # validate it exists
757
+ _current_dialect_name = name
758
+
759
+
760
+ def _get_var(name):
761
+ if name not in bio_vars:
762
+ print(f"❌ Variable '{name}' not defined.")
763
+ return None
764
+ return bio_vars[name]
765
+
766
+
767
+ def _parse_visible(msg):
768
+ """Parse print message: split on +, strip quotes, resolve variables."""
769
+ parts = msg.split("+")
770
+ result = []
771
+ for part in parts:
772
+ part = part.strip()
773
+ if (part.startswith('"') and part.endswith('"')) or \
774
+ (part.startswith("'") and part.endswith("'")):
775
+ result.append(part[1:-1])
776
+ elif part in bio_vars:
777
+ result.append(str(bio_vars[part]))
778
+ else:
779
+ result.append(part)
780
+ return "".join(result)
781
+
782
+
783
+ def _score_rating(score):
784
+ ratings = ("meh", "ok", "slay", "SHEEESH")
785
+ return ratings[min(int(score) // 5, 3)]
786
+
787
+
788
+ def _execute_command(cmd_name, match, dialect):
789
+ """Execute a bio command using dialect-specific response templates."""
790
+ cmd_def = dialect["commands"][cmd_name]
791
+
792
+ if cmd_name == "init":
793
+ print(cmd_def.get("greeting", ""))
794
+ return
795
+
796
+ if cmd_name == "end":
797
+ return
798
+
799
+ if cmd_name == "declare":
800
+ name = match.group("name")
801
+ seq = match.group("seq")
802
+ try:
803
+ bio_vars[name] = BioSeq(seq)
804
+ except ValueError as e:
805
+ print(f"❌ {e}")
806
+ return
807
+
808
+ if cmd_name == "reverse":
809
+ name = match.group("name")
810
+ var = _get_var(name)
811
+ if var:
812
+ bio_vars[name] = BioSeq(var.reverse_complement())
813
+ return
814
+
815
+ if cmd_name == "gc_content":
816
+ name = match.group("name")
817
+ var = _get_var(name)
818
+ if var:
819
+ gc = var.gc_content()
820
+ template = cmd_def.get("response", "GC content: {value:.2f}%")
821
+ rating = _score_rating(gc / 10) # 0-100% -> rating
822
+ print(template.format(value=gc, rating=rating))
823
+ return
824
+
825
+ if cmd_name == "print":
826
+ # The print pattern matches just the keyword; the rest of the line is the message
827
+ keyword = cmd_def["pattern"]
828
+ # Find where the keyword ends in the original line
829
+ msg = match.string[match.end():].strip()
830
+ if not msg:
831
+ # Try getting everything after the keyword
832
+ idx = match.string.upper().find(keyword.upper())
833
+ if idx >= 0:
834
+ msg = match.string[idx + len(keyword):].strip()
835
+ print(_parse_visible(msg))
836
+ return
837
+
838
+ if cmd_name == "transcribe":
839
+ name = match.group("name")
840
+ var = _get_var(name)
841
+ if var:
842
+ bio_vars[name] = BioSeq(str(var.seq.transcribe()))
843
+ return
844
+
845
+ if cmd_name == "translate":
846
+ name = match.group("name")
847
+ var = _get_var(name)
848
+ if var:
849
+ bio_vars[name] = BioSeq(str(var.seq.translate()), validate=False)
850
+ return
851
+
852
+ if cmd_name == "align":
853
+ try:
854
+ from Bio.Align import PairwiseAligner
855
+ name1 = match.group("a")
856
+ name2 = match.group("b")
857
+ if name1 not in bio_vars or name2 not in bio_vars:
858
+ print("❌ One or both sequences not defined.")
859
+ return
860
+ seq1 = str(bio_vars[name1])
861
+ seq2 = str(bio_vars[name2])
862
+
863
+ aligner = PairwiseAligner()
864
+ aligner.mode = "global"
865
+ alignments = aligner.align(seq1, seq2)
866
+
867
+ if not alignments:
868
+ print("⚠️ No alignment found.")
869
+ return
870
+
871
+ print(alignments[0].format())
872
+ score = alignments[0].score
873
+ template = cmd_def.get("response", "Score: {score:.2f} {rating}")
874
+ print(template.format(score=score, rating=_score_rating(score)))
875
+ except Exception as e:
876
+ print(f"❌ Error during alignment: {e}")
877
+ return
878
+
879
+ if cmd_name == "mutate":
880
+ name = match.group("name")
881
+ var = _get_var(name)
882
+ if not var:
883
+ return
884
+ original = str(var)
885
+ mutated = list(original)
886
+ if not mutated:
887
+ print("⚠️ sequence is empty, nothing to mutate")
888
+ return
889
+ index = random.randint(0, len(mutated) - 1)
890
+ bases = "ACGTU"
891
+ original_base = mutated[index]
892
+ choices = [b for b in bases if b != original_base.upper()]
893
+ mutated[index] = random.choice(choices)
894
+ bio_vars[name] = BioSeq("".join(mutated))
895
+ template = cmd_def.get("response", "mutated {name} at pos {pos}: {old} ➜ {new}")
896
+ print(template.format(name=name, pos=index + 1, old=original[index], new=mutated[index]))
897
+ return
898
+
899
+ if cmd_name == "length":
900
+ name = match.group("name")
901
+ var = _get_var(name)
902
+ if var:
903
+ template = cmd_def.get("response", "{name} is {value} bases long")
904
+ print(template.format(name=name, value=var.length()))
905
+ return
906
+
907
+ if cmd_name == "complement":
908
+ name = match.group("name")
909
+ var = _get_var(name)
910
+ if var:
911
+ bio_vars[name] = BioSeq(var.complement())
912
+ return
913
+
914
+ if cmd_name == "find_orf":
915
+ name = match.group("name")
916
+ var = _get_var(name)
917
+ if var:
918
+ orf = var.find_orf()
919
+ if orf:
920
+ template = cmd_def.get("orf_found", "ORF found: {orf} ({length} bp)")
921
+ print(template.format(orf=orf, length=len(orf)))
922
+ else:
923
+ template = cmd_def.get("orf_missing", "No ORF found in {name}")
924
+ print(template.format(name=name))
925
+ return
926
+
927
+ if cmd_name == "motif":
928
+ name = match.group("name")
929
+ pattern = match.group("pattern")
930
+ var = _get_var(name)
931
+ if var:
932
+ matches = var.find_motif(pattern)
933
+ if matches:
934
+ template = cmd_def.get("found", "Found {count} match(es):")
935
+ print(template.format(count=len(matches), pattern=pattern, name=name))
936
+ pos_template = cmd_def.get("position", " pos {start}-{end}")
937
+ for start, end in matches:
938
+ print(pos_template.format(start=start, end=end))
939
+ else:
940
+ template = cmd_def.get("not_found", "No matches for '{pattern}' in {name}")
941
+ print(template.format(pattern=pattern, name=name))
942
+ return
943
+
944
+
945
+ def handle_line(line, dialect_name=None):
946
+ """Process a single bioLOLPython command line using the specified dialect."""
947
+ if dialect_name is None:
948
+ dialect_name = _current_dialect_name
949
+
950
+ if not line or line.startswith("BTW"):
951
+ return
952
+
953
+ dialect = get_dialect(dialect_name)
954
+
955
+ for cmd_name, cmd_def in dialect["commands"].items():
956
+ m = cmd_def["matcher"].match(line)
957
+ if m:
958
+ _execute_command(cmd_name, m, dialect)
959
+ return
960
+
961
+ # Check if this is an end command or comment — the regex might not match
962
+ # partial lines, so also check the print command specially since it's a prefix match
963
+ print_cmd = dialect["commands"].get("print")
964
+ if print_cmd:
965
+ keyword = print_cmd["pattern"]
966
+ if line.startswith(keyword):
967
+ msg = line[len(keyword):].strip()
968
+ print(_parse_visible(msg))
969
+ return
970
+
971
+ def reset_bio_vars():
972
+ bio_vars.clear()
973
+
974
+ def execfile(infile, module_name="__lolmain__"):
975
+ "file, module_name -- exec the lolpython file in a newly created module"
976
+ if not hasattr(infile, "read"):
977
+ with open(infile, "r") as f:
978
+ s = f.read()
979
+ else:
980
+ s = infile.read()
981
+ return execstring(s, module_name)
982
+
983
+ def execstring(s, module_name="__lolmain__"):
984
+ "s, module_name -- exec the lolpython string in a newly created module"
985
+ python_s = to_python(s)
986
+ # Doing this bit of trickiness so I can have LOLPython code act
987
+ # like __main__. This fix is enough to fool unittest.
988
+ m = types.ModuleType(module_name)
989
+ sys.modules[module_name] = m
990
+ exec(python_s, m.__dict__)
991
+ return m
992
+
993
+ def convert_file(infile, outfile):
994
+ "read LOLPython code from infile, write converted Python code to outfile"
995
+ if not hasattr(outfile, "write"):
996
+ outfile = open(outfile, "w")
997
+ outfile.write(to_python(infile.read()))
998
+
999
+ def convert(filenames):
1000
+ "convert LOLPython filenames into corresponding Python '.py' files"
1001
+ if not filenames:
1002
+ convert_file(sys.stdin, sys.stdout)
1003
+ else:
1004
+ for filename in filenames:
1005
+ base, ext = os.path.splitext(filename)
1006
+ convert_file(open(filename, "r"), open(base+".py", "w"))
1007
+
1008
+ def help():
1009
+ print("""convert and run a lolpython program
1010
+ Commands are:
1011
+ lolpython Read a lolpython program from stdin and execute it
1012
+ lolpython --convert Convert a lolpython program from stdin
1013
+ and generate python to stdout
1014
+ lolpython --convert filename1 [filename....]
1015
+ Convert a list of lolpython files into Python files
1016
+ lolpython filename [arg1 [arg2 ...]]
1017
+ Run a lolpython program using optional arguments
1018
+ """)
1019
+
1020
+ def main(argv):
1021
+ if len(argv) >= 2:
1022
+ if argv[1] == "--convert":
1023
+ convert(argv[2:])
1024
+ return
1025
+ if argv[1] == "--help":
1026
+ help()
1027
+ return
1028
+ if argv[1] == "--version":
1029
+ print(__NAME__ + " " + __VERSION__)
1030
+ return
1031
+
1032
+ # otherwise, run the lolpython program
1033
+ sys.argv = sys.argv[1:]
1034
+ filename = sys.argv[0]
1035
+ if filename.endswith(".lolz"):
1036
+ with open(filename, "r") as f:
1037
+ for line in f:
1038
+ handle_line(line.strip())
1039
+ return
1040
+ execfile(filename, "__main__")
1041
+ else:
1042
+ # commands from stdin
1043
+ execfile(sys.stdin)
1044
+
1045
+
1046
+
1047
+ if __name__ == "__main__":
1048
+ main(sys.argv)
1049
+