janus-llm 4.2.0__py3-none-any.whl → 4.3.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. janus/__init__.py +1 -1
  2. janus/__main__.py +1 -1
  3. janus/_tests/evaluator_tests/EvalReadMe.md +85 -0
  4. janus/_tests/evaluator_tests/incose_tests/incose_large_test.json +39 -0
  5. janus/_tests/evaluator_tests/incose_tests/incose_small_test.json +17 -0
  6. janus/_tests/evaluator_tests/inline_comment_tests/mumps_inline_comment_test.m +71 -0
  7. janus/_tests/test_cli.py +3 -2
  8. janus/cli/aggregate.py +135 -0
  9. janus/cli/cli.py +111 -0
  10. janus/cli/constants.py +43 -0
  11. janus/cli/database.py +289 -0
  12. janus/cli/diagram.py +178 -0
  13. janus/cli/document.py +174 -0
  14. janus/cli/embedding.py +122 -0
  15. janus/cli/llm.py +187 -0
  16. janus/cli/partition.py +125 -0
  17. janus/cli/self_eval.py +149 -0
  18. janus/cli/translate.py +183 -0
  19. janus/converter/__init__.py +1 -1
  20. janus/converter/_tests/test_translate.py +2 -0
  21. janus/converter/converter.py +129 -92
  22. janus/converter/document.py +21 -14
  23. janus/converter/evaluate.py +237 -4
  24. janus/converter/translate.py +3 -3
  25. janus/embedding/collections.py +1 -1
  26. janus/language/alc/_tests/alc.asm +3779 -0
  27. janus/language/alc/_tests/test_alc.py +1 -1
  28. janus/language/alc/alc.py +9 -4
  29. janus/language/binary/_tests/hello.bin +0 -0
  30. janus/language/block.py +47 -12
  31. janus/language/file.py +1 -1
  32. janus/language/mumps/_tests/mumps.m +235 -0
  33. janus/language/splitter.py +31 -23
  34. janus/language/treesitter/_tests/languages/fortran.f90 +416 -0
  35. janus/language/treesitter/_tests/languages/ibmhlasm.asm +16 -0
  36. janus/language/treesitter/_tests/languages/matlab.m +225 -0
  37. janus/language/treesitter/treesitter.py +9 -1
  38. janus/llm/models_info.py +26 -13
  39. janus/metrics/_tests/asm_test_file.asm +10 -0
  40. janus/metrics/_tests/mumps_test_file.m +6 -0
  41. janus/metrics/_tests/test_treesitter_metrics.py +1 -1
  42. janus/metrics/prompts/clarity.txt +8 -0
  43. janus/metrics/prompts/completeness.txt +16 -0
  44. janus/metrics/prompts/faithfulness.txt +10 -0
  45. janus/metrics/prompts/hallucination.txt +16 -0
  46. janus/metrics/prompts/quality.txt +8 -0
  47. janus/metrics/prompts/readability.txt +16 -0
  48. janus/metrics/prompts/usefulness.txt +16 -0
  49. janus/parsers/code_parser.py +4 -4
  50. janus/parsers/doc_parser.py +12 -9
  51. janus/parsers/eval_parsers/incose_parser.py +134 -0
  52. janus/parsers/eval_parsers/inline_comment_parser.py +112 -0
  53. janus/parsers/parser.py +7 -0
  54. janus/parsers/partition_parser.py +47 -13
  55. janus/parsers/reqs_parser.py +8 -5
  56. janus/parsers/uml.py +5 -4
  57. janus/prompts/prompt.py +2 -2
  58. janus/prompts/templates/README.md +30 -0
  59. janus/prompts/templates/basic_aggregation/human.txt +6 -0
  60. janus/prompts/templates/basic_aggregation/system.txt +1 -0
  61. janus/prompts/templates/basic_refinement/human.txt +14 -0
  62. janus/prompts/templates/basic_refinement/system.txt +1 -0
  63. janus/prompts/templates/diagram/human.txt +9 -0
  64. janus/prompts/templates/diagram/system.txt +1 -0
  65. janus/prompts/templates/diagram_with_documentation/human.txt +15 -0
  66. janus/prompts/templates/diagram_with_documentation/system.txt +1 -0
  67. janus/prompts/templates/document/human.txt +10 -0
  68. janus/prompts/templates/document/system.txt +1 -0
  69. janus/prompts/templates/document_cloze/human.txt +11 -0
  70. janus/prompts/templates/document_cloze/system.txt +1 -0
  71. janus/prompts/templates/document_cloze/variables.json +4 -0
  72. janus/prompts/templates/document_cloze/variables_asm.json +4 -0
  73. janus/prompts/templates/document_inline/human.txt +13 -0
  74. janus/prompts/templates/eval_prompts/incose/human.txt +32 -0
  75. janus/prompts/templates/eval_prompts/incose/system.txt +1 -0
  76. janus/prompts/templates/eval_prompts/incose/variables.json +3 -0
  77. janus/prompts/templates/eval_prompts/inline_comments/human.txt +49 -0
  78. janus/prompts/templates/eval_prompts/inline_comments/system.txt +1 -0
  79. janus/prompts/templates/eval_prompts/inline_comments/variables.json +3 -0
  80. janus/prompts/templates/micromanaged_mumps_v1.0/human.txt +23 -0
  81. janus/prompts/templates/micromanaged_mumps_v1.0/system.txt +3 -0
  82. janus/prompts/templates/micromanaged_mumps_v2.0/human.txt +28 -0
  83. janus/prompts/templates/micromanaged_mumps_v2.0/system.txt +3 -0
  84. janus/prompts/templates/micromanaged_mumps_v2.1/human.txt +29 -0
  85. janus/prompts/templates/micromanaged_mumps_v2.1/system.txt +3 -0
  86. janus/prompts/templates/multidocument/human.txt +15 -0
  87. janus/prompts/templates/multidocument/system.txt +1 -0
  88. janus/prompts/templates/partition/human.txt +22 -0
  89. janus/prompts/templates/partition/system.txt +1 -0
  90. janus/prompts/templates/partition/variables.json +4 -0
  91. janus/prompts/templates/pseudocode/human.txt +7 -0
  92. janus/prompts/templates/pseudocode/system.txt +7 -0
  93. janus/prompts/templates/refinement/fix_exceptions/human.txt +19 -0
  94. janus/prompts/templates/refinement/fix_exceptions/system.txt +1 -0
  95. janus/prompts/templates/refinement/format/code_format/human.txt +12 -0
  96. janus/prompts/templates/refinement/format/code_format/system.txt +1 -0
  97. janus/prompts/templates/refinement/format/requirements_format/human.txt +14 -0
  98. janus/prompts/templates/refinement/format/requirements_format/system.txt +1 -0
  99. janus/prompts/templates/refinement/hallucination/human.txt +13 -0
  100. janus/prompts/templates/refinement/hallucination/system.txt +1 -0
  101. janus/prompts/templates/refinement/reflection/human.txt +15 -0
  102. janus/prompts/templates/refinement/reflection/incose/human.txt +26 -0
  103. janus/prompts/templates/refinement/reflection/incose/system.txt +1 -0
  104. janus/prompts/templates/refinement/reflection/incose_deduplicate/human.txt +16 -0
  105. janus/prompts/templates/refinement/reflection/incose_deduplicate/system.txt +1 -0
  106. janus/prompts/templates/refinement/reflection/system.txt +1 -0
  107. janus/prompts/templates/refinement/revision/human.txt +16 -0
  108. janus/prompts/templates/refinement/revision/incose/human.txt +16 -0
  109. janus/prompts/templates/refinement/revision/incose/system.txt +1 -0
  110. janus/prompts/templates/refinement/revision/incose_deduplicate/human.txt +17 -0
  111. janus/prompts/templates/refinement/revision/incose_deduplicate/system.txt +1 -0
  112. janus/prompts/templates/refinement/revision/system.txt +1 -0
  113. janus/prompts/templates/refinement/uml/alc_fix_variables/human.txt +15 -0
  114. janus/prompts/templates/refinement/uml/alc_fix_variables/system.txt +2 -0
  115. janus/prompts/templates/refinement/uml/fix_connections/human.txt +15 -0
  116. janus/prompts/templates/refinement/uml/fix_connections/system.txt +2 -0
  117. janus/prompts/templates/requirements/human.txt +13 -0
  118. janus/prompts/templates/requirements/system.txt +2 -0
  119. janus/prompts/templates/retrieval/language_docs/human.txt +10 -0
  120. janus/prompts/templates/retrieval/language_docs/system.txt +1 -0
  121. janus/prompts/templates/simple/human.txt +16 -0
  122. janus/prompts/templates/simple/system.txt +3 -0
  123. janus/refiners/format.py +49 -0
  124. janus/refiners/refiner.py +143 -4
  125. janus/utils/enums.py +140 -111
  126. janus/utils/logger.py +2 -0
  127. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/METADATA +7 -7
  128. janus_llm-4.3.5.dist-info/RECORD +210 -0
  129. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/WHEEL +1 -1
  130. janus_llm-4.3.5.dist-info/entry_points.txt +3 -0
  131. janus/cli.py +0 -1343
  132. janus_llm-4.2.0.dist-info/RECORD +0 -113
  133. janus_llm-4.2.0.dist-info/entry_points.txt +0 -3
  134. {janus_llm-4.2.0.dist-info → janus_llm-4.3.5.dist-info}/LICENSE +0 -0
@@ -9,7 +9,7 @@ from langchain_core.messages import BaseMessage
9
9
  from langchain_core.pydantic_v1 import BaseModel, Field
10
10
 
11
11
  from janus.language.block import CodeBlock
12
- from janus.parsers.parser import JanusParser
12
+ from janus.parsers.parser import JanusParser, JanusParserException
13
13
  from janus.utils.logger import create_logger
14
14
 
15
15
  log = create_logger(__name__)
@@ -36,6 +36,29 @@ class PartitionList(BaseModel):
36
36
  )
37
37
 
38
38
 
39
+ # The following IDs appear in the prompt example. If the LLM produces them,
40
+ # they should be ignored
41
+ EXAMPLE_IDS = {
42
+ "0d2f4f8d",
43
+ "def2a953",
44
+ "75315253",
45
+ "e7f928da",
46
+ "1781b2a9",
47
+ "2fe21e27",
48
+ "9aef6179",
49
+ "6061bd82",
50
+ "22bd0c30",
51
+ "5d85e19e",
52
+ "06027969",
53
+ "91b722fb",
54
+ "4b3f79be",
55
+ "k57w964a",
56
+ "51638s96",
57
+ "065o6q32",
58
+ "j5q6p852",
59
+ }
60
+
61
+
39
62
  class PartitionParser(JanusParser, PydanticOutputParser):
40
63
  token_limit: int
41
64
  model: BaseLanguageModel
@@ -59,7 +82,10 @@ class PartitionParser(JanusParser, PydanticOutputParser):
59
82
  # Generate a unique ID for each line (ensure they are unique)
60
83
  line_ids = set()
61
84
  while len(line_ids) < len(self.lines):
62
- line_ids.add(str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8])
85
+ line_id = str(uuid.UUID(int=RNG.getrandbits(128), version=4))[:8]
86
+ if line_id in EXAMPLE_IDS:
87
+ continue
88
+ line_ids.add(line_id)
63
89
 
64
90
  # Prepend each line with the corresponding ID, save the mapping
65
91
  self.line_id_to_index = {lid: i for i, lid in enumerate(line_ids)}
@@ -71,6 +97,11 @@ class PartitionParser(JanusParser, PydanticOutputParser):
71
97
  def parse(self, text: str | BaseMessage) -> str:
72
98
  if isinstance(text, BaseMessage):
73
99
  text = str(text.content)
100
+ original_text = text
101
+
102
+ # Strip everything outside the JSON object
103
+ begin, end = text.find("["), text.rfind("]")
104
+ text = text[begin : end + 1]
74
105
 
75
106
  try:
76
107
  out: PartitionList = super().parse(text)
@@ -78,26 +109,28 @@ class PartitionParser(JanusParser, PydanticOutputParser):
78
109
  log.debug(f"Invalid JSON object. Output:\n{text}")
79
110
  raise
80
111
 
112
+ # Get partition locations, discard reasoning
113
+ partition_locations = {partition.location for partition in out.__root__}
114
+
115
+ # Ignore IDs from the example input
116
+ partition_locations.difference_update(EXAMPLE_IDS)
117
+
81
118
  # Locate any invalid line IDs, raise exception if any found
82
- invalid_splits = [
83
- partition.location
84
- for partition in out.__root__
85
- if partition.location not in self.line_id_to_index
86
- ]
119
+ invalid_splits = partition_locations.difference(self.line_id_to_index)
87
120
  if invalid_splits:
88
121
  err_msg = (
89
122
  f"{len(invalid_splits)} line ID(s) not found in input: "
90
123
  + ", ".join(invalid_splits)
91
124
  )
92
125
  log.warning(err_msg)
93
- raise OutputParserException(err_msg)
126
+ raise JanusParserException(original_text, err_msg)
94
127
 
95
128
  # Map line IDs to indices (so they can be sorted and lines indexed)
96
129
  index_to_line_id = {0: "START", None: "END"}
97
130
  split_points = {0}
98
- for partition in out.__root__:
99
- index = self.line_id_to_index[partition.location]
100
- index_to_line_id[index] = partition.location
131
+ for partition in partition_locations:
132
+ index = self.line_id_to_index[partition]
133
+ index_to_line_id[index] = partition
101
134
  split_points.add(index)
102
135
 
103
136
  # Get partition start/ends, chunks, chunk lengths
@@ -128,9 +161,10 @@ class PartitionParser(JanusParser, PydanticOutputParser):
128
161
  "Oversized chunks:\n"
129
162
  + "\n#############\n".join(chunk for _, chunk, _ in data)
130
163
  )
131
- raise OutputParserException(
164
+ raise JanusParserException(
165
+ original_text,
132
166
  f"The following segments are too long and must be "
133
- f"further subdivided:\n{problem_points}"
167
+ f"further subdivided:\n{problem_points}",
134
168
  )
135
169
 
136
170
  return "\n<JANUS_PARTITION>\n".join(chunks)
@@ -2,10 +2,9 @@ import json
2
2
  import re
3
3
 
4
4
  from langchain.output_parsers.json import parse_json_markdown
5
- from langchain_core.exceptions import OutputParserException
6
5
  from langchain_core.messages import BaseMessage
7
6
 
8
- from janus.parsers.parser import JanusParser
7
+ from janus.parsers.parser import JanusParser, JanusParserException
9
8
  from janus.utils.logger import create_logger
10
9
 
11
10
  log = create_logger(__name__)
@@ -20,6 +19,7 @@ class RequirementsParser(JanusParser):
20
19
  def parse(self, text: str | BaseMessage) -> str:
21
20
  if isinstance(text, BaseMessage):
22
21
  text = str(text.content)
22
+ original_text = text
23
23
 
24
24
  # TODO: This is an incorrect implementation (lstrip and rstrip take character
25
25
  # lists and strip any instances of those characters, not the full str)
@@ -30,11 +30,14 @@ class RequirementsParser(JanusParser):
30
30
  obj = parse_json_markdown(text)
31
31
  except json.JSONDecodeError as e:
32
32
  log.debug(f"Invalid JSON object. Output:\n{text}")
33
- raise OutputParserException(f"Got invalid JSON object. Error: {e}")
33
+ raise JanusParserException(
34
+ original_text, f"Got invalid JSON object. Error: {e}"
35
+ )
34
36
 
35
37
  if not isinstance(obj, dict):
36
- raise OutputParserException(
37
- f"Got invalid return object. Expected a dictionary, but got {type(obj)}"
38
+ raise JanusParserException(
39
+ original_text,
40
+ f"Got invalid return object. Expected a dictionary, but got {type(obj)}",
38
41
  )
39
42
  return json.dumps(obj)
40
43
 
janus/parsers/uml.py CHANGED
@@ -3,10 +3,10 @@ import subprocess # nosec
3
3
  from pathlib import Path
4
4
  from tempfile import NamedTemporaryFile
5
5
 
6
- from langchain_core.exceptions import OutputParserException
7
6
  from langchain_core.messages import BaseMessage
8
7
 
9
8
  from janus.parsers.code_parser import CodeParser
9
+ from janus.parsers.parser import JanusParserException
10
10
  from janus.utils.logger import create_logger
11
11
 
12
12
  log = create_logger(__name__)
@@ -14,6 +14,7 @@ log = create_logger(__name__)
14
14
 
15
15
  class UMLSyntaxParser(CodeParser):
16
16
  def _check_plantuml(self, text: str) -> None:
17
+ original_text = text
17
18
  # Leading newlines can break the parser, remove them
18
19
  text = text.replace("\\n", "\n").strip()
19
20
 
@@ -43,7 +44,7 @@ class UMLSyntaxParser(CodeParser):
43
44
  log.error(err_txt)
44
45
  raise Exception(err_txt)
45
46
 
46
- # Check for bad outputs, raise OutputParserExceptions if so
47
+ # Check for bad outputs, raise JanusParserExceptions if so
47
48
  if "Error" in stderr or "Error" in stdout:
48
49
  err_txt = "Recieved UML parsing error(s)."
49
50
 
@@ -64,7 +65,7 @@ class UMLSyntaxParser(CodeParser):
64
65
  err_txt += f"\nError located at line {i} must be fixed:\n"
65
66
  err_txt += "\n".join(err_lines)
66
67
  log.warning(err_txt)
67
- raise OutputParserException(err_txt)
68
+ raise JanusParserException(original_text, err_txt)
68
69
 
69
70
  if "Warning" in stdout or "Warning" in stderr:
70
71
  err_txt = "Recieved UML parsing warning (often due to missing PLANTUML)."
@@ -74,7 +75,7 @@ class UMLSyntaxParser(CodeParser):
74
75
  err_txt += f"\nSTDOUT:\n```\n{stdout.strip()}\n```\n"
75
76
 
76
77
  log.warning(err_txt)
77
- raise OutputParserException(err_txt)
78
+ raise JanusParserException(original_text, err_txt)
78
79
 
79
80
  def _get_error_lines(self, s: str) -> list[int]:
80
81
  return [int(x.group(1)) for x in re.finditer(r"Error line (\d+) in file:", s)]
janus/prompts/prompt.py CHANGED
@@ -23,7 +23,7 @@ TEXT_OUTPUT = []
23
23
  # same language as the input, regardless of the `output-lang` argument.
24
24
  SAME_OUTPUT = ["document_inline"]
25
25
 
26
- JSON_OUTPUT = ["evaluate", "document", "document_madlibs", "requirements"]
26
+ JSON_OUTPUT = ["evaluate", "document", "document_cloze", "requirements"]
27
27
 
28
28
  # Directory containing Janus prompt template directories and files
29
29
  JANUS_PROMPT_TEMPLATES_DIR = Path(__file__).parent / "templates"
@@ -109,7 +109,7 @@ class PromptEngine(ABC):
109
109
  source_language = source_language.lower()
110
110
  self.variables = dict(
111
111
  SOURCE_LANGUAGE=source_language,
112
- FILE_SUFFIX=LANGUAGES[source_language]["suffix"],
112
+ FILE_SUFFIX=LANGUAGES[source_language]["suffixes"],
113
113
  SOURCE_CODE_EXAMPLE=LANGUAGES[source_language]["example"],
114
114
  )
115
115
  if target_language is not None:
@@ -0,0 +1,30 @@
1
+ # Prompt Template Files
2
+
3
+ Janus supports defining custom prompts in text files.
4
+
5
+ ```
6
+ directory_name/
7
+ system.txt
8
+ human.txt
9
+ variables.json (optional)
10
+ ```
11
+
12
+ ## Prompt templates
13
+ - `system.txt` contains text representing the system prompt template,
14
+ + Ex. "Your purpose is to understand {SOURCE_LANGUAGE} code."
15
+ - `human.txt` contains text representing the human prompt template.
16
+ + Ex. "Summarize the contents of the following {SOURCE_LANGUAGE} code in {written_language} sentences.
17
+
18
+ Both prompt templates can make use of f-string-style arguments, i.e. `{VARIABLE}`. Multiple lines are supported.
19
+
20
+ To reuse a prompt, say for the same system directive with differing output styles, create a symbolic link to the original file. For example:
21
+ `ln -s ../document/system.txt system.txt`
22
+
23
+ ## Variables
24
+ - (Optional) `variables.json` contains a JSON object representing additional variables and their values used in the templates above, beyond what is provided to Janus via command-line arguments.
25
+ + Ex.
26
+ ```
27
+ {
28
+ "written_language": "Spanish"
29
+ }
30
+ ```
@@ -0,0 +1,6 @@
1
+ Combine the following documentation for a program in {SOURCE_LANGUAGE} code into a single description
2
+ Make sure to put the resultant description within triple backticks.
3
+ Here are the representations:
4
+ ```
5
+ {SOURCE_CODE}
6
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer named John and tasked with combining representations of code into a single representation.
@@ -0,0 +1,14 @@
1
+ Please fix the following output generated by a large language model.
2
+ Provide your corrected output in the same format as the original.
3
+ The large language model was given the following prompt in triple backticks:
4
+ ```
5
+ {ORIGINAL_PROMPT}
6
+ ```
7
+ and produced the following output:
8
+ ```
9
+ {OUTPUT}
10
+ ```
11
+ but received the following errors:
12
+ ```
13
+ {ERRORS}
14
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer named John and tasked with fixing the output created by a large language model.
@@ -0,0 +1,9 @@
1
+ Generate a UML {DIAGRAM_TYPE} diagram using PLANTUML syntax that improves the readability of the following {SOURCE_LANGUAGE} code for a programmer.
2
+ In your output, make sure to reformat any {SOURCE_LANGUAGE} code that would break PLANTUML syntax rules.
3
+ Do not output any {SOURCE_LANGUAGE} code in the diagram.
4
+ Make sure to capture all relevant syntax, functions and branching in the source code.
5
+ Make sure to document all functions in the code
6
+ Here is the source code:
7
+ ```
8
+ {SOURCE_CODE}
9
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer named John and tasked with creating PLANTUML documentation of {SOURCE_LANGUAGE} code.
@@ -0,0 +1,15 @@
1
+ Generate a UML {DIAGRAM_TYPE} diagram using PLANTUML syntax that improves the readability of the following {SOURCE_LANGUAGE} code for a programmer.
2
+ You are also provided with documentation for this code.
3
+ In your output, make sure to reformat any {SOURCE_LANGUAGE} code that would break PLANTUML syntax rules.
4
+ Do not output any {SOURCE_LANGUAGE} code in the diagram.
5
+ Make sure to capture all relevant syntax, functions and branching in the source code.
6
+ Make sure to document all functions in the code
7
+ Make sure to put the resultant PLANTUML code within triple backticks.
8
+ Here is the code documentation:
9
+ ```
10
+ {DOCUMENTATION}
11
+ ```
12
+ Here is the source code:
13
+ ```
14
+ {SOURCE_CODE}
15
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer named John and tasked with creating PLANTUML documentation of {SOURCE_LANGUAGE} code.
@@ -0,0 +1,10 @@
1
+ Please explain the {SOURCE_LANGUAGE} code section below. Your response should be in plain text with no delimiters. It should contain a natural language description of the code's intended functionality; do not describe the execution step-by-step, simply explain the overall purpose. This description should be roughly one paragraph in length; multiple paragraphs may be used if and only if the code is particularly complex or has multiple independent functions.
2
+ After this description, describe the expected initial state and/or inputs, the expected terminal state and/or outputs, and any potential exceptions that might arise in the code's execution.
3
+
4
+ It is vital that you do not include any other context, questions, or text of any kind, other than the documentation for this piece of code. You should include all of the fields described above, and those fields only.
5
+
6
+ Here is the code:
7
+
8
+ ```
9
+ {SOURCE_CODE}
10
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer tasked with documenting {SOURCE_LANGUAGE} code.
@@ -0,0 +1,11 @@
1
+ The {SOURCE_LANGUAGE} code provided below has had its comments replaced by either `<INLINE_COMMENT #>` (for single-line comments) or `<BLOCK_COMMENT #>` (for multiple consecutive lines of comments), where `#` takes the place of an 8-character alphanumeric ID. You are to write replacement comments based on the source code.
2
+
3
+ Return a JSON-formatted string where the keys are the alphanumeric IDs and the values are the comments that should be inserted in the code. Be sure to include comments for all placeholders present in the input. Do not provide any other commentary, do not write any code or additional comments.
4
+
5
+ Example input: ```{EXAMPLE_INPUT}```
6
+ Example output: ```{EXAMPLE_OUTPUT}```
7
+
8
+ Please provide comments for the following code:
9
+ ```
10
+ {SOURCE_CODE}
11
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer tasked with documenting {SOURCE_LANGUAGE} code.
@@ -0,0 +1,4 @@
1
+ {
2
+ "EXAMPLE_INPUT": " I '$D(CATLIST) Q\n ; <INLINE_COMMENT 14c59f05>\n S CAT=\"\"\n F S CAT=$O(CATLIST(CAT)) Q:CAT=\"\" D\n . S LDATE=$O(CATLIST(CAT,\"\"),-1)\n .; <INLINE_COMMENT 97a39adf>\n . S DATE=\"\"\n . F S DATE=$O(CATLIST(CAT,DATE)) Q:DATE=LDATE D\n .. S WCR=\"\"\n .. F S WCR=$O(CATLIST(CAT,DATE,WCR)) Q:WCR=\"\" D\n ... S FI=\"\"\n ... F S FI=$O(CATLIST(CAT,DATE,WCR,FI)) Q:FI=\"\" D\n .... S FIEVAL(FI)=0\n ....; <INLINE_COMMENT 4fd34837>\n .... S IND=0\n .... F S IND=+$O(FIEVAL(FI,IND)) Q:IND=0 S FIEVAL(FI,IND)=0\n .; <BLOCK_COMMENT 997ec49a>\n . S (NTRUE,WCR)=0\n . F S WCR=$O(CATLIST(CAT,LDATE,WCR)) Q:WCR=\"\" D\n .. S FI=\"\"\n .. F S FI=$O(CATLIST(CAT,LDATE,WCR,FI)) Q:FI=\"\" D\n ... I NTRUE=0 D Q\n ....; <INLINE_COMMENT 3ac32fb5>\n .... S (IND,NTRUE)=1\n .... F S IND=+$O(FIEVAL(FI,IND)) Q:IND=0 S FIEVAL(FI,IND)=0\n ... S FIEVAL(FI)=0\n ...; <INLINE_COMMENT d04a8fdf>\n ... S IND=0\n ... F S IND=+$O(FIEVAL(FI,IND)) Q:IND=0 S FIEVAL(FI,IND)=0\n Q\n",
3
+ "EXAMPLE_OUTPUT": "{\n \"14c59f05\": \";Only the most recent HF in a category can be true.\",\n \"97a39adf\": \";For each category set all but the most recent HF false.\",\n \"4fd34837\": \";If there are multiple occurrences set them all false.\",\n \"997ec49a\": \" .;\\n .;If there is more than on HF on the most recent date then only the\\n .;one with the highest WCR can be true. The highest possible WCR is 1.\\n .;Set all with lower WCRs false.\\n .;If the most recent health factor has multiple occurrences only\\n .;the first occurrence can be true.\",\n \"3ac32fb5\": \";If there are multiple sub-occurrences set them all false.\"\n}"
4
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "EXAMPLE_INPUT": "***********************************************************************\n* <BLOCK_COMMENT d8453f99>\n***********************************************************************\nZUIDSTCK AMODE 31\nZUIDSTCK RMODE 31\nZUIDSTCK CSECT\n STM R14,R12,12(R13) <INLINE_COMMENT b2cc1643>\n L R1,0(R1) <INLINE_COMMENT a315e5ca>\n USING DSA,R1 <INLINE_COMMENT 3155f463>\n STCKE EISTOD <INLINE_COMMENT d84b5ebf>\n\n LM R14,R12,12(R13) <INLINE_COMMENT a79a8e65>\n XR R15,R15 <INLINE_COMMENT 47ad1b4d>\n BR R14 <INLINE_COMMENT eb971719>\n",
3
+ "EXAMPLE_OUTPUT": "{\n \"d8453f99\": \"Control Section\",\n \"b2cc1643\": \"Save registers\",\n \"a315e5ca\": \"Load parameter address\",\n \"3155f463\": \"... tell assembler\",\n \"d84b5ebf\": \"Save STCKE TOD\",\n \"a79a8e65\": \"Load Registers\",\n \"47ad1b4d\": \"Clear R15 (RC)\",\n \"eb971719\": \"Return to calling program\"\n}"
4
+ }
@@ -0,0 +1,13 @@
1
+ Please add inline comments to the {SOURCE_LANGUAGE} code
2
+ provided below in triple backticks.
3
+
4
+ ```
5
+ {SOURCE_CODE}
6
+ ```
7
+
8
+ Keep all source code in the output.
9
+
10
+ Please add a comment at the top of the file which summarizes
11
+ the purpose of the code.
12
+
13
+ Please add comments to functions which summarize their functionality.
@@ -0,0 +1,32 @@
1
+ Below is a snippet of {SOURCE_LANGUAGE} code to use as reference for the following task:
2
+ ```
3
+ {SOURCE_CODE}
4
+ ```
5
+
6
+ Given the above code and the list of requirements I will soon supply, please evaluate each requirement individually based on the following criteria:
7
+
8
+ C1 - Necessary: The need or requirement statement defines an essential capability, characteristic, constraint, or quality factor needed to satisfy a lifecycle concept, need, source, or parent requirement.
9
+ C2 - Appropriate: The specific intent and amount of detail of the need or requirement statement is appropriate to the level (the level of abstraction, organization, or system architecture) of the entity to which it refers.
10
+ C3 - Unambiguous: Need statements must be written such that the stakeholder intent is clear. Requirement statements must be stated such that the requirement can be interpreted in only one way by all the intended stakeholders.
11
+ C4 - Complete: The requirement statement sufficiently describes the necessary capability, characteristic, constraint, or quality factor to meet the need, source, or parent requirement from which it was transformed without needing other information to understand the requirement.
12
+ C5 - Singular: The stakeholder need or requirement statement should state a single capability, characteristic, constraint, or quality factor.
13
+ C6 - Feasible: The need or requirement can be realized within entity constraints (for example: cost, schedule, technical, legal, ethical, safety) with acceptable risk.
14
+ C7 - Verifiable: The requirement statement is structured and worded such that its realization can be verified to the approving authority’s satisfaction.
15
+ C8 - Correct: The need statement must be an accurate representation of the lifecycle concept or source from which it was transformed. The requirement statement must be an accurate representation of the need, source, or parent requirement from which it was transformed.
16
+ C9 - Conforming: Individual needs and requirements should conform to an approved standard pattern and style guide or standard for writing and managing needs and requirements.
17
+
18
+ For each and every requirement below, you must indicate whether they "pass" or "fail" each of the above criteria. Briefly explain your reasoning before providing each pass/fail.
19
+
20
+ Your response should be formatted as a list of JSON objects, with each object corresponding to one requirement. Each object should include 10 keys: `requirement_id`, `C1`, `C2`, ..., `C9`. `requirement_id` should have a string value that holds the 8-character UUID associated with the requirement. The other four values should each be a JSON object with two keys: `reasoning` (a clear explanation of why the criterion is passed or failed) and a `score` (the literal string "pass" or "fail").
21
+
22
+ Be discerning in your evaluation; only very high-quality requirements should pass all criteria. Be a hard grader. If a requirement fails a criterion, be thorough and detailed in your explanation of why.
23
+
24
+ Below is an example output for a snippet of code with three labeled requirements:
25
+ ```
26
+ {EXAMPLE_OUTPUT}
27
+ ```
28
+
29
+ Here are the requirements that you are to evaluate:
30
+ {REQUIREMENTS}
31
+
32
+ Don't forget to include your final scores in JSON format!
@@ -0,0 +1 @@
1
+ You are a software quality engineer, your job is to evaluate requirments according to a rubric.
@@ -0,0 +1,3 @@
1
+ {
2
+ "EXAMPLE_OUTPUT": "[\n {\n \"requirement_id\": \"c3caa172\",\n \"requirement\": \"The UserID field must be followed by a comma (,) as a field separator.\",\n \"C1\": {\n \"reasoning\": \"This defines an essential characteristic of the data structure.\",\n \"score\": \"pass\"\n },\n \"C2\": {\n \"reasoning\": \"The detail provided is appropriate for the software's data structure level.\",\n \"score\": \"pass\"\n },\n \"C3\": {\n \"reasoning\": \"The statement is clear and unambiguous.\",\n \"score\": \"pass\"\n },\n \"C4\": {\n \"reasoning\": \"The requirement sufficiently describes the separator without needing additional information.\",\n \"score\": \"pass\"\n },\n \"C5\": {\n \"reasoning\": \"The requirement states a single characteristic.\",\n \"score\": \"pass\"\n },\n \"C6\": {\n \"reasoning\": \"Including a field separator is feasible.\",\n \"score\": \"pass\"\n },\n \"C7\": {\n \"reasoning\": \"The requirement can be verified by checking the data structure.\",\n \"score\": \"pass\"\n },\n \"C8\": {\n \"reasoning\": \"The requirement accurately represents the need for a separator.\",\n \"score\": \"pass\"\n },\n \"C9\": {\n \"reasoning\": \"The requirement conforms to standard pattern and style.\",\n \"score\": \"pass\"\n }\n },\n {\n \"requirement_id\": \"fab48ab9\",\n \"requirement\": \"The software must handle web communication parameters, including buffer addresses and lengths for web receive operations, query string management, and basic and query mode service program identifiers.\",\n \"C1\": {\n \"reasoning\": \"Defines essential capabilities for handling web communication parameters.\",\n \"score\": \"pass\"\n },\n \"C2\": {\n \"reasoning\": \"Appropriate detail for software handling web communication.\",\n \"score\": \"pass\"\n },\n \"C3\": {\n \"reasoning\": \"Clear and unambiguous about what the software must handle.\",\n \"score\": \"pass\"\n },\n \"C4\": {\n \"reasoning\": \"Sufficiently describes the necessary capabilities without needing additional information.\",\n \"score\": \"pass\"\n },\n \"C5\": {\n \"reasoning\": \"States multiple capabilities related to web communication parameters.\",\n \"score\": \"fail\"\n },\n \"C6\": {\n \"reasoning\": \"Feasible to implement within typical software constraints.\",\n \"score\": \"pass\"\n },\n \"C7\": {\n \"reasoning\": \"Verification can be done through testing the software's handling of these parameters.\",\n \"score\": \"pass\"\n },\n \"C8\": {\n \"reasoning\": \"Accurately represents the need for handling web communication parameters.\",\n \"score\": \"pass\"\n },\n \"C9\": {\n \"reasoning\": \"Conforms to standard requirement style and structure.\",\n \"score\": \"pass\"\n }\n }\n]"
3
+ }
@@ -0,0 +1,49 @@
1
+ Please evaluate each comment in the provided {SOURCE_LANGUAGE} code based on the following criteria:
2
+
3
+ Completeness - Does the comment address all capabilities of the relevant source code?
4
+
5
+ 4 - All essential functionality is documented.
6
+ 3 - Most essential functionality is documented.
7
+ 2 - Little essential functionality is documented.
8
+ 1 - No essential functionality is documented.
9
+
10
+
11
+ Hallucination - Does the comment provide true information?
12
+
13
+ 4 - The comment provides only true information.
14
+ 3 - The comment provides mostly true information.
15
+ 2 - The comment provides mostly untrue information.
16
+ 1 - The comment is completely untrue.
17
+
18
+
19
+ Readability - Is the comment clear to read?
20
+
21
+ 4 - The comment is well-written.
22
+ 3 - The comment has few problems.
23
+ 2 - The comment has many problems.
24
+ 1 - The comment is unreadable.
25
+
26
+ Usefulness - Is the comment useful?
27
+
28
+ 4 - The comment helps an expert programmer understand the code better.
29
+ 3 - The comment helps an average programmer understand the code better.
30
+ 2 - The comment documents only trivial functionality.
31
+ 1 - The comment is not useful at any level.
32
+
33
+
34
+ Look through the code and find each individual comment, they will be deliniated by <BLOCK_COMMENT id> or <INLINE_COMMENT id> where "id" is an 8-character UUID for the comment that follows.
35
+
36
+ Each comment should be evaluated independently based on the above criteria. Your response should be formatted as a list of JSON objects, with each object corresponding to one comment. Each object should include five keys: `comment_id`, `completeness`, `hallucination`, `readability`, and `usefulness`. `comment_id` should have a string value that holds the 8-character UUID associated with the comment. The other four values should each be a JSON object with two keys: `reasoning` (a clear explanation of why the criteria is rated the way it is) and `score` (an integer rating from 1 to 4).
37
+
38
+ Be discerning in your evaluation; only very high-quality comments should get top marks. Be a hard grader. If a comment is rated low, be thorough and detailed in your explanation of your score.
39
+
40
+ Below is an example output for a snippet of code with three labeled comments:
41
+ ```{EXAMPLE_OUTPUT}```
42
+
43
+
44
+ Evaluate the following code:
45
+ ```
46
+ {SOURCE_CODE}
47
+ ```
48
+
49
+ Don't forget to include your final scores in JSON format!
@@ -0,0 +1 @@
1
+ You are a software quality engineer, your job is to evaluate comments in code according to a rubric.
@@ -0,0 +1,3 @@
1
+ {
2
+ "EXAMPLE_OUTPUT": "[\n {\n \"comment_id\": \"abcd1234\",\n \"completeness\": {\n \"reasoning\": \"The comment completely describes the functionality of the block\",\n \"score\": 4\n },\n \"hallucination\": {\n \"reasoning\": \"The comment misrepresents the behavior of for-loops in this language, and as a result is entirely wrong about what this code does\",\n \"score\": 1\n },\n \"readability\": {\n \"reasoning\": \"The comment is easy to read and understand\",\n \"score\": 4\n },\n \"usefulness\": {\n \"reasoning\": \"The comment would be necessary to understand this block (if the comment were correct)\",\n \"score\": 3\n }\n },\n {\n \"comment_id\": \"5678efgh\",\n \"completeness\": {\n \"reasoning\": \"There is no explanation of the GR_lby variable\",\n \"score\": 2\n },\n \"hallucination\": {\n \"reasoning\": \"The comment is accurate\",\n \"score\": 4\n },\n \"readability\": {\n \"reasoning\": \"The comment is wordy but generally understandable\",\n \"score\": 3\n },\n \"usefulness\": {\n \"reasoning\": \"The comment didn't explain anything that isn't already obvious from the code\",\n \"score\": 1\n }\n },\n {\n \"comment_id\": \"00aa22bb\",\n \"completeness\": {\n \"reasoning\": \"The comment describes the functionality of the line, but not an explanation/origin of the hard-coded integer\",\n \"score\": 3\n },\n \"hallucination\": {\n \"reasoning\": \"The comment mentions a function that does not appear to exist in the file\",\n \"score\": 2\n },\n \"readability\": {\n \"reasoning\": \"The comment was difficult to follow, and formatted poorly\",\n \"score\": 1\n },\n \"usefulness\": {\n \"reasoning\": \"This line would be completely incomprehensible without this comment\",\n \"score\": 4\n }\n }\n]"
3
+ }
@@ -0,0 +1,23 @@
1
+ Adhere to the following rules for translating MUMPS to Python:
2
+
3
+ 1. Routines from other files
4
+ When a function from another file is invoked, treat the file like a module. Keep all imports at the beginning of the returned code.
5
+
6
+ 2. Naming Conventions
7
+ Adhere to PEP8 for variable and function names. Improve readability when possible, making use of context and documentation. For example, a MUMPS variable like `RXQTY` might be translated to `prescription_quantity`.
8
+
9
+ 3. Ignore K(ill) Commands
10
+ Memory allocation and garbage collection is generally handled automatically in Python, so any MUMPS K(ill) commands should be ignored.
11
+
12
+ 4. Arrays
13
+ MUMPS arrays should generally be treated as nested dictionaries.
14
+
15
+ 5. Global Variables
16
+ When globals (prepended by a circumflex) are used in a routine, treat them as coming from a mysql database. Assume that database credentials are stored in environment variables ('SQL_HOST`, `SQL_USER`, `SQL_PWD`, `SQL_DB`).
17
+
18
+
19
+ Please convert the following MUMPS .m code found in between triple backticks into {TARGET_LANGUAGE} code. The returned code should also be delimited with triple backticks.
20
+
21
+ ```
22
+ {SOURCE_CODE}
23
+ ```
@@ -0,0 +1,3 @@
1
+ Your purpose is to convert MUMPS .m code
2
+ into runnable {TARGET_LANGUAGE} code ({TARGET_LANGUAGE} version
3
+ {TARGET_LANGUAGE_VERSION}).
@@ -0,0 +1,28 @@
1
+ Adhere to the following rules for translating MUMPS to Python:
2
+
3
+ 1. Routines from other files
4
+ When a function from another file is invoked (e.g. `D FUNC^ABC`), treat the file like a module (e.g. `from abc import func`, `func()` ). Keep all imports at the beginning of the returned code.
5
+
6
+ 2. Naming Conventions
7
+ Adhere to PEP8 for variable and function names. Improve readability when possible, making use of context and documentation. For example, a MUMPS variable like `RXQTY` might be translated to `prescription_quantity`.
8
+
9
+ 3. Ignore K(ill) Commands
10
+ Memory allocation and garbage collection is generally handled automatically in Python, so any MUMPS K(ill) commands should be ignored.
11
+
12
+ 4. Arrays
13
+ MUMPS arrays should generally be treated as nested dictionaries.
14
+
15
+ 5. Global Variables
16
+ When globals (prepended by a circumflex) are used in a routine, treat them as coming from a mysql database. Assume that database credentials are stored in environment variables ('SQL_HOST`, `SQL_USER`, `SQL_PWD`, `SQL_DB`).
17
+
18
+ 6. Local Variables
19
+ In MUMPS, even "local" variables are accessible from any subroutine. If a variable would not be defined, declare it as global.
20
+
21
+ 7. Translate Everything
22
+ Translate ALL the given code to the best of your ability.
23
+
24
+ Please convert the following MUMPS .m code found in between triple backticks into {TARGET_LANGUAGE} code. The returned code should also be delimited with triple backticks.
25
+
26
+ ```
27
+ {SOURCE_CODE}
28
+ ```
@@ -0,0 +1,3 @@
1
+ Your purpose is to convert MUMPS .m code
2
+ into runnable {TARGET_LANGUAGE} code ({TARGET_LANGUAGE} version
3
+ {TARGET_LANGUAGE_VERSION}).
@@ -0,0 +1,29 @@
1
+ Adhere to the following rules for translating MUMPS to Python:
2
+
3
+ 1. Routines from other files
4
+ When a function from another file is invoked (e.g. `D FUNC^ABC`), treat the file like a module (e.g. `from abc import func`, `func()` ). Keep all imports at the beginning of the returned code.
5
+
6
+ 2. Naming Conventions
7
+ Adhere to PEP8 for variable and function names. Improve readability when possible, making use of context and documentation. For example, a MUMPS variable like `RXQTY` might be translated to `prescription_quantity`.
8
+
9
+ 3. Ignore K(ill) Commands
10
+ Memory allocation and garbage collection is generally handled automatically in Python, so any MUMPS K(ill) commands should be ignored.
11
+
12
+ 4. Arrays
13
+ MUMPS arrays should be treated as nested dictionaries.
14
+
15
+ 5. Global Variables
16
+ When globals (prepended by a circumflex) are used in a routine, treat them as coming from a mysql database. Assume that database credentials are stored in environment variables ('SQL_HOST`, `SQL_USER`, `SQL_PWD`, `SQL_DB`).
17
+
18
+ 6. Local Variables
19
+ In MUMPS, even "local" variables are accessible from any subroutine. Declare all variables used in any function as global at the beginning of the function.
20
+
21
+ 7. Translate Everything
22
+ Translate ALL the given code to the best of your ability. DO NOT use pseudocode. DO NOT leave functions empty. DO NOT give up on translation.
23
+
24
+
25
+ Please convert the following MUMPS .m code found in between triple backticks into {TARGET_LANGUAGE} code. The returned code should also be delimited with triple backticks.
26
+
27
+ ```
28
+ {SOURCE_CODE}
29
+ ```
@@ -0,0 +1,3 @@
1
+ Your purpose is to convert MUMPS .m code
2
+ into runnable {TARGET_LANGUAGE} code ({TARGET_LANGUAGE} version
3
+ {TARGET_LANGUAGE_VERSION}).
@@ -0,0 +1,15 @@
1
+ Please document the {SOURCE_LANGUAGE} function or module below. Your response should be in JSON format, and include three string fields:
2
+
3
+ docstring: A Sphinx-style docstring for the code, including a summary of its functionality; the name, type, and description of any parameters or returns; and any potential exceptions that might arise in its execution. This should be a string value, NOT a nested JSON object.
4
+ example_usage: A well-commented minimal example in {SOURCE_LANGUAGE} utilizing the given code's functionality.
5
+ pseudocode: A Python-stype pseudocode implementation of the module or function's behavior.
6
+
7
+ If no executable code is provided (for example, if the input is a simple label with no logic attached), return an empty string for each of the above fields.
8
+
9
+ It is vital that you do not include any other context, questions, or text of any kind, other than the documentation for this piece of code. You should include all of the fields described above, and those fields only.
10
+
11
+ Here is the code:
12
+
13
+ ```
14
+ {SOURCE_CODE}
15
+ ```
@@ -0,0 +1 @@
1
+ You are a senior software engineer tasked with documenting {SOURCE_LANGUAGE} code.
@@ -0,0 +1,22 @@
1
+ Partition the {SOURCE_LANGUAGE} code into logical blocks. Each block should be relatively self-contained and ideally constitute a complete "subroutine", including any associated comments. These breakpoints should usually be inserted between labeled blocks, but perhaps not between *every* labeled block (depending on things like fallthrough).
2
+
3
+ INPUT FORMAT:
4
+ Each line of code has been prepended with an 8-character unique ID. a Python example would look like this:
5
+ ```
6
+ {EXAMPLE_INPUT}
7
+ ```
8
+
9
+ And your output might look like this:
10
+ ```
11
+ {EXAMPLE_OUTPUT}
12
+ ```
13
+
14
+ You are to output a JSON object containing a subset of these IDs, corresponding to the lines that should start a new block. Each partition should be paired with an explanation (please output the explanation first, before giving the line ID). DO NOT include any additional commentary before or after the JSON object, your response should be the JSON object ONLY.
15
+
16
+ Format instructions:
17
+ {format_instructions}
18
+
19
+ Input:
20
+ ```
21
+ {SOURCE_CODE}
22
+ ```
@@ -0,0 +1 @@
1
+ Your purpose is to partition {SOURCE_LANGUAGE} code into self-contained logical blocks.