bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +92 -0
  4. bioguider/agents/agent_tools.py +176 -0
  5. bioguider/agents/agent_utils.py +504 -0
  6. bioguider/agents/collection_execute_step.py +182 -0
  7. bioguider/agents/collection_observe_step.py +125 -0
  8. bioguider/agents/collection_plan_step.py +156 -0
  9. bioguider/agents/collection_task.py +184 -0
  10. bioguider/agents/collection_task_utils.py +142 -0
  11. bioguider/agents/common_agent.py +137 -0
  12. bioguider/agents/common_agent_2step.py +215 -0
  13. bioguider/agents/common_conversation.py +61 -0
  14. bioguider/agents/common_step.py +85 -0
  15. bioguider/agents/consistency_collection_step.py +102 -0
  16. bioguider/agents/consistency_evaluation_task.py +57 -0
  17. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  18. bioguider/agents/consistency_observe_step.py +110 -0
  19. bioguider/agents/consistency_query_step.py +77 -0
  20. bioguider/agents/dockergeneration_execute_step.py +186 -0
  21. bioguider/agents/dockergeneration_observe_step.py +154 -0
  22. bioguider/agents/dockergeneration_plan_step.py +158 -0
  23. bioguider/agents/dockergeneration_task.py +158 -0
  24. bioguider/agents/dockergeneration_task_utils.py +220 -0
  25. bioguider/agents/evaluation_installation_task.py +270 -0
  26. bioguider/agents/evaluation_readme_task.py +767 -0
  27. bioguider/agents/evaluation_submission_requirements_task.py +172 -0
  28. bioguider/agents/evaluation_task.py +206 -0
  29. bioguider/agents/evaluation_tutorial_task.py +169 -0
  30. bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
  31. bioguider/agents/evaluation_userguide_prompts.py +179 -0
  32. bioguider/agents/evaluation_userguide_task.py +154 -0
  33. bioguider/agents/evaluation_utils.py +127 -0
  34. bioguider/agents/identification_execute_step.py +181 -0
  35. bioguider/agents/identification_observe_step.py +104 -0
  36. bioguider/agents/identification_plan_step.py +140 -0
  37. bioguider/agents/identification_task.py +270 -0
  38. bioguider/agents/identification_task_utils.py +22 -0
  39. bioguider/agents/peo_common_step.py +64 -0
  40. bioguider/agents/prompt_utils.py +253 -0
  41. bioguider/agents/python_ast_repl_tool.py +69 -0
  42. bioguider/agents/rag_collection_task.py +130 -0
  43. bioguider/conversation.py +67 -0
  44. bioguider/database/code_structure_db.py +500 -0
  45. bioguider/database/summarized_file_db.py +146 -0
  46. bioguider/generation/__init__.py +39 -0
  47. bioguider/generation/benchmark_metrics.py +610 -0
  48. bioguider/generation/change_planner.py +189 -0
  49. bioguider/generation/document_renderer.py +157 -0
  50. bioguider/generation/llm_cleaner.py +67 -0
  51. bioguider/generation/llm_content_generator.py +1128 -0
  52. bioguider/generation/llm_injector.py +809 -0
  53. bioguider/generation/models.py +85 -0
  54. bioguider/generation/output_manager.py +74 -0
  55. bioguider/generation/repo_reader.py +37 -0
  56. bioguider/generation/report_loader.py +166 -0
  57. bioguider/generation/style_analyzer.py +36 -0
  58. bioguider/generation/suggestion_extractor.py +436 -0
  59. bioguider/generation/test_metrics.py +189 -0
  60. bioguider/managers/benchmark_manager.py +785 -0
  61. bioguider/managers/evaluation_manager.py +215 -0
  62. bioguider/managers/generation_manager.py +686 -0
  63. bioguider/managers/generation_test_manager.py +107 -0
  64. bioguider/managers/generation_test_manager_v2.py +525 -0
  65. bioguider/rag/__init__.py +0 -0
  66. bioguider/rag/config.py +117 -0
  67. bioguider/rag/data_pipeline.py +651 -0
  68. bioguider/rag/embedder.py +24 -0
  69. bioguider/rag/rag.py +138 -0
  70. bioguider/settings.py +103 -0
  71. bioguider/utils/code_structure_builder.py +59 -0
  72. bioguider/utils/constants.py +135 -0
  73. bioguider/utils/default.gitignore +140 -0
  74. bioguider/utils/file_utils.py +215 -0
  75. bioguider/utils/gitignore_checker.py +175 -0
  76. bioguider/utils/notebook_utils.py +117 -0
  77. bioguider/utils/pyphen_utils.py +73 -0
  78. bioguider/utils/python_file_handler.py +65 -0
  79. bioguider/utils/r_file_handler.py +551 -0
  80. bioguider/utils/utils.py +163 -0
  81. bioguider-0.2.52.dist-info/LICENSE +21 -0
  82. bioguider-0.2.52.dist-info/METADATA +51 -0
  83. bioguider-0.2.52.dist-info/RECORD +84 -0
  84. bioguider-0.2.52.dist-info/WHEEL +4 -0
@@ -0,0 +1,767 @@
1
+
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Callable
5
+ from langchain.prompts import ChatPromptTemplate
6
+ from langchain_openai.chat_models.base import BaseChatOpenAI
7
+
8
+ from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
9
+ from bioguider.utils.gitignore_checker import GitignoreChecker
10
+
11
+ from .evaluation_utils import compute_readability_metrics, run_llm_evaluation
12
+ from bioguider.agents.agent_utils import (
13
+ read_file, read_license_file,
14
+ summarize_file
15
+ )
16
+ from bioguider.agents.evaluation_task import EvaluationTask
17
+ from bioguider.utils.constants import (
18
+ DEFAULT_TOKEN_USAGE,
19
+ ProjectMetadata,
20
+ ProjectLevelEvaluationREADMEResult,
21
+ StructuredEvaluationREADMEResult,
22
+ FreeProjectLevelEvaluationREADMEResult,
23
+ FreeFolderLevelEvaluationREADMEResult,
24
+ EvaluationREADMEResult,
25
+ )
26
+ from bioguider.utils.utils import get_overall_score, increase_token_usage
27
+ from bioguider.rag.config import configs
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ README_PROJECT_LEVEL_SYSTEM_PROMPT = """
32
+ You are an expert in evaluating the quality of README files in software repositories.
33
+ Your task is to analyze the provided README file and identify if it is a project-level README file or a folder-level README file.
34
+
35
+ ---
36
+
37
+ ### **Classification Guidelines**
38
+
39
+ - **Project-level README**:
40
+ - Located in the repository root (e.g., `/`, `.github/`, or `docs/`).
41
+ - Contains project-wide content: overview, installation steps, global usage, badges, links to main docs or contributing guide.
42
+
43
+ - **Folder-level README**:
44
+ - Located inside a subfolder.
45
+ - Describes the specific folder: its purpose, contents, how to use or run what's inside, local instructions.
46
+
47
+ ---
48
+
49
+ ### **Output Format**
50
+ Based solely on the file's **path**, **name**, and **content**, classify the README as either a **project-level** or **folder-level** README.
51
+ Output **exactly** the following format:
52
+
53
+ **FinalAnswer**
54
+ **Project-level:** [Yes / No]
55
+
56
+ ---
57
+
58
+ ### **README Path**
59
+ {readme_path}
60
+
61
+ ---
62
+
63
+ ### **README content**
64
+ {readme_content}
65
+
66
+ """
67
+
68
+ STRUCTURED_EVALUATION_README_SYSTEM_PROMPT = """
69
+ You are an expert in evaluating the quality of README files in software repositories.
70
+ Your task is to analyze the provided README file and generate a structured quality assessment based on the following criteria.
71
+ If a LICENSE file is present in the repository, its content will also be provided to support your evaluation of license-related criteria.
72
+ You **must** provide the evaluation score in your response.
73
+ ---
74
+
75
+ ### **Evaluation Criteria**
76
+
77
+ 1. **Available**: Is the README accessible and present?
78
+ * Output: `Yes` or `No`
79
+
80
+ 2. **Readability**: Evaluate based on readability metrics AND identify specific errors/issues in the text.
81
+ * Output: a number between 0 and 100 representing the overall quality rating.
82
+ * **IMPORTANT**: You MUST identify and list ALL errors and anomalies found in the text, including but not limited to:
83
+ - **Typos and spelling errors**: Misspelled words (e.g., "succesfully" → "successfully", "maintaine" → "maintained", "analysi" → "analysis")
84
+ - **Malformed links**: URLs with missing or incorrect syntax (e.g., "https//example.com" missing colon ":", "](https//..." missing ":")
85
+ - **Markdown syntax errors**:
86
+ * Headers without spaces (e.g., "#Header" should be "# Header")
87
+ * Lists without spaces (e.g., "-item" should be "- item")
88
+ * Code fence syntax issues, blockquote errors, etc.
89
+ - **Image syntax errors**:
90
+ * Extra space between brackets and parentheses: "![alt] (url)" should be "![alt](url)" - NO SPACE between ] and (
91
+ * Missing exclamation mark: "[alt](url)" should be "![alt](url)" for images
92
+ * Check EVERY image tag carefully for spacing errors
93
+ - **Domain-specific term errors**: Incorrect biological/technical terms (e.g., "single sell" → "single cell", "genomis" → "genomics", "spacial" → "spatial")
94
+ - **Grammar and punctuation issues**: Missing punctuation, incorrect word forms
95
+ - **Inconsistencies**: Version numbers, dates, names that don't match
96
+ - **Formatting issues**: Misaligned tables, broken formatting, inconsistent styling
97
+ - **ANY OTHER ANOMALIES**: Use your judgment to identify anything that looks wrong, unusual, or inconsistent
98
+ * Look for patterns that seem off (e.g., repeated characters, truncated words)
99
+ * Identify structural problems (e.g., duplicate sections, missing sections)
100
+ * Note any content that seems corrupted or malformed
101
+ * If something looks suspicious or incorrect, report it even if it doesn't fit a specific category
102
+ - **Do not** make up errors - only report errors that are actually present in the text
103
+ * For EACH error found, you must provide:
104
+ - The exact text snippet containing the error
105
+ - The type of error (typo/link/markdown/image_syntax/bio_term/grammar/inconsistency/formatting/other)
106
+ - The suggested correction
107
+ - If the error doesn't fit a standard category, use "other" and describe the issue
108
+ * **COMPREHENSIVE SCANNING APPROACH**:
109
+ - **Image syntax**: Scan the entire README for image tags "![...](...)" - check if there's a space between "]" and "("
110
+ - **Links**: Check ALL URLs for missing colons, broken syntax, or malformed markdown
111
+ - **Markdown structure**: Verify headers, lists, code blocks, tables for proper syntax
112
+ - **Word-level**: Look for truncated words (missing last letters), unusual character patterns
113
+ - **Context awareness**: If something doesn't make sense in context, it might be an error
114
+ - **Trust your judgment**: Report anything that looks suspicious, even if you're not 100% sure of the category
115
+ * **Grade Level**:
116
+ - **85-100**: The README is exceptionally clear, polished, and engaging with NO errors. It reads smoothly, with minimal effort required from the reader.
117
+ - **65-84**: The README is clear and easy to understand, with minor errors (1-3 small typos or issues).
118
+ - **45-64**: The README has noticeable errors (4-10 typos, link issues, or markdown problems) that impact readability.
119
+ - **0-44**: The README has numerous errors (10+ typos, broken links, markdown issues) making it difficult to read and unprofessional.
120
+
121
+ 3. **Project Purpose**: Is the project's goal or function clearly stated?
122
+ * Output: `Yes` or `No`
123
+ * Provide suggestions if unclear
124
+
125
+ 4. **Hardware and Software Requirements**: Are hardware/software specs and compatibility details included?
126
+ * Output: a number between 0 and 100 representing the overall quality rating.
127
+ * Suggest how to improve the section if needed
128
+ * **Grade Level**:
129
+ - **85-100**: The README provides a clear and comprehensive guide to the tutorial, with all necessary steps and information provided.
130
+ - **65-84**: The README provides a clear and comprehensive guide to the tutorial, with most necessary steps and information provided.
131
+ - **45-64**: The README provides a clear and comprehensive guide to the tutorial, with some necessary steps and information provided.
132
+ - **0-44**: The README does not provide a clear and comprehensive guide to the tutorial, with no necessary steps and information provided.
133
+
134
+ 5. **Dependencies**: Are all necessary software libraries and dependencies clearly listed?
135
+ * Output: a number between 0 and 100 representing the overall quality rating.
136
+ * Suggest improvements if applicable
137
+ * **Grade Level**:
138
+ - **85-100**: The README provides a clear and comprehensive guide to the tutorial, with all necessary steps and information provided.
139
+ - **65-84**: The README provides a clear and comprehensive guide to the tutorial, with most necessary steps and information provided.
140
+ - **45-64**: The README provides a clear and comprehensive guide to the tutorial, with some necessary steps and information provided.
141
+ - **0-44**: The README does not provide a clear and comprehensive guide to the tutorial, with no necessary steps and information provided.
142
+
143
+ 6. **License Information**: Is license type clearly indicated?
144
+ * Output: `Yes` or `No`
145
+ * Suggest improvement if missing or unclear
146
+
147
+ 7. **Author / Contributor Info**: Are contributor or maintainer details provided?
148
+ * Output: `Yes` or `No`
149
+ * Suggest improvement if missing
150
+
151
+ 8. **Overall Score**: Give an overall quality rating of the README.
152
+ * Output: a number between 0 and 100 representing the overall quality rating.
153
+ * **Grade Level**:
154
+ - **85-100**: The README provides a clear and comprehensive guide to the tutorial, with all necessary steps and information provided.
155
+ - **65-84**: The README provides a clear and comprehensive guide to the tutorial, with most necessary steps and information provided.
156
+ - **45-64**: The README provides a clear and comprehensive guide to the tutorial, with some necessary steps and information provided.
157
+ - **0-44**: The README does not provide a clear and comprehensive guide to the tutorial, with no necessary steps and information provided.
158
+
159
+ ---
160
+
161
+ ### **Readability Metrics**
162
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
163
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
164
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
165
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
166
+
167
+ ---
168
+
169
+ ### **Final Report Ouput**
170
+ Your final report must **exactly match** the following format. Do not add or omit any sections.
171
+
172
+ **FinalAnswer**
173
+ **Available:** [Yes / No]
174
+ **Readability:**
175
+ * score: a number between 0 and 100 representing the overall quality rating.
176
+ * error_count: <total number of ALL errors found (typos + links + markdown + bio_terms + grammar + image_syntax + inconsistencies + formatting + other)>
177
+ * errors_found: <list ALL errors with format: "ERROR_TYPE | original text snippet | suggested fix | explanation">
178
+ * suggestions: <general suggestions to improve README readability>
179
+
180
+ Note: Be thorough and comprehensive. Report EVERY issue you find, even if you're uncertain about its category. Use "other" category for unusual or ambiguous errors.
181
+ **Project Purpose:**
182
+ * score: [Yes / No]
183
+ * suggestions: <suggestions to improve project purpose.>
184
+ **Hardware and software spec and compatibility description:**
185
+ * score: a number between 0 and 100 representing the overall quality rating.
186
+ * suggestions: <suggestions to improve **hardware and software** description>
187
+ **Dependencies clearly stated:**
188
+ * score: a number between 0 and 100 representing the overall quality rating.
189
+ * suggestions: <suggestions to improve **Dependencies** description>
190
+ **License Information Included:**
191
+ * score: [Yes / No]
192
+ * suggestions: <suggestions to improve **License Information**>
193
+ ** Code contributor / Author information included
194
+ * score: [Yes / No]
195
+ **Overall Score:** a number between 0 and 100 representing the overall quality rating.
196
+
197
+ ---
198
+
199
+ ### **README Path**
200
+ {readme_path}
201
+
202
+ ---
203
+
204
+ ### **README content**
205
+ {readme_content}
206
+
207
+ ---
208
+
209
+ ### **LICENSE Path**
210
+ {license_path}
211
+
212
+ ---
213
+
214
+ ### **LICENSE Summarized Content**
215
+ {license_summarized_content}
216
+
217
+ """
218
+
219
+ PROJECT_LEVEL_EVALUATION_README_SYSTEM_PROMPT = """
220
+ You are an expert in evaluating the quality of README files in software repositories.
221
+ Your task is to analyze the provided project-level README file and generate a comprehensive quality report.
222
+ You will be given:
223
+ 1. A README file.
224
+ 2. A structured evaluation of the README file and its reasoning process.
225
+
226
+ ---
227
+
228
+ ### **Output Format**
229
+ Your output must **exactly match** the following format. Do not add or omit any sections.
230
+
231
+ **FinalAnswer**
232
+ **Available:**
233
+ <Your assessment and suggestion here>
234
+ **Readability:**
235
+ <Your assessment and suggestion here - MUST include ALL specific errors found>
236
+ **Project Purpose:**
237
+ <Your assessment and suggestion here>
238
+ **Hardware and software spec and compatibility description:**
239
+ <Your assessment and suggestion here>
240
+ **Dependencies clearly stated:**
241
+ <Your assessment and suggestion here>
242
+ **License Information Included:**
243
+ <Your assessment and suggestion here>
244
+ **Code contributor / Author information included
245
+ <Your assessment and suggestion here>
246
+ **Overall Score:**
247
+ <Your assessment and suggestion here>
248
+ ---
249
+
250
+ ### **Instructions**
251
+ 1. Based on the provided structured evaluation and its reasoning process, generate a free evaluation of the README file.
252
+ 2. Focus on the explanation of assessment in structured evaluation and how to improve the README file based on the structured evaluation and its reasoning process.
253
+ * For each suggestion to improve the README file, you **must provide some examples** of the original text snippet and the improving comments.
254
+ 3. For each item in the structured evaluation, provide a detailed assessment followed by specific, actionable comments for improvement.
255
+ 4. **CRITICAL for Readability section**: You MUST report ALL specific errors found in the structured evaluation:
256
+ * List EVERY error of ANY type - typos, links, markdown, images, bio terms, inconsistencies, formatting issues, or any other anomaly
257
+ * DO NOT limit yourself to just 3-4 examples - report ALL errors found
258
+ * Use the format: "Line X: 'original text' → 'corrected text' (ERROR_TYPE)"
259
+ * Group errors by type for clarity (typos, links, markdown, image_syntax, bio terms, inconsistencies, formatting, other)
260
+ * **EXTENSIBILITY**: If you find an error that doesn't fit the standard categories, still report it under "Other Errors" with a clear description
261
+ * **COMPREHENSIVE REVIEW**: Read through the entire README carefully, looking for anything unusual, incorrect, or inconsistent
262
+ * Trust your language understanding to identify problems even if they're not explicitly listed in the categories
263
+ 5. Your improvement suggestions must also include the original text snippet and the improving comments.
264
+ 6. In the **FinalAnswer** of output, in each section output, please first give a detailed explanation of the assessment, and then provide the detailed suggestion for improvement. If you think the it is good enough, you can say so.
265
+ The following is an example of the output format:
266
+ **FinalAnswer**
267
+ **Available:**
268
+ Detailed explanation of the assessment. Such as: The README file is present in the repository. The content of the file has been shared completely and is accessible. This confirms the availability of the README documentation for evaluation. There's no issue with availability.
269
+ Detailed suggestion for improvement. Such as: Add a brief introductory section summarizing the project and its main purpose would help orient readers.
270
+ **Readability:**
271
+ Detailed explanation of the assessment. Such as: The README is relatively easy to read for someone around the sixth-grade level. While the technical details provided are moderately easy to understand for those familiar with programming and command-line tools, newbies or non-technical users might face challenges due to jargon and lack of introductory explanations.
272
+
273
+ **ERRORS FOUND** (list ALL errors from structured evaluation):
274
+ Typos and Spelling Errors:
275
+ - Line 1: "succesfully" → "successfully" (typo)
276
+ - Line 2: "maintaine" → "maintained" (typo)
277
+ - ... (list ALL typos found)
278
+
279
+ Malformed Links:
280
+ - Line 1: "](https//www.example.com)" → "](https://www.example.com)" (missing colon)
281
+ - ... (list ALL link errors found)
282
+
283
+ Markdown Syntax Errors:
284
+ - Line 5: "#Seurat" → "# Seurat" (missing space after #)
285
+ - ... (list ALL markdown errors found)
286
+
287
+ Image Syntax Errors:
288
+ - Line 1: "[![Badge] (url)" → "[![Badge](url)" (extra space between ] and ()
289
+ - Line 2: "![Alt] (url)" → "![Alt](url)" (extra space between ] and ()
290
+ - ... (list ALL image syntax errors found - check EVERY image tag)
291
+
292
+ Bio Term Errors:
293
+ - Line 7: "single sell" → "single cell" (incorrect bio term)
294
+ - ... (list ALL bio term errors found)
295
+
296
+ Inconsistencies:
297
+ - Line X: "version 5.0" vs "version 5.1" elsewhere → inconsistent version numbers
298
+ - ... (list ALL inconsistencies found)
299
+
300
+ Formatting Issues:
301
+ - Line X: Table columns misaligned
302
+ - ... (list ALL formatting issues found)
303
+
304
+ Other Errors/Anomalies:
305
+ - Line X: "text text..." → appears truncated or corrupted
306
+ - Line Y: Duplicate section heading
307
+ - ... (list ANY other suspicious issues you notice)
308
+
309
+ General Suggestions:
310
+ - Add a brief introductory section summarizing the project and its main purpose would help orient readers.
311
+ - Break down long instructions into smaller bullet points.
312
+ - Proofread for typos and grammar errors before publishing.
313
+ **Project Purpose:**
314
+ Detailed explanation of the assessment. Such as: The README indirectly describes project activities like benchmarking and assessing functionalities using LLMs and tools like Poetry. However, it lacks a direct statement that defines the overarching project goals or explains who the intended audience is.
315
+ Detailed suggestion for improvement. Such as:
316
+ - Including a clear project purpose at the beginning, such as: "This project provides a framework for evaluating tabular data models using large language model (LLM)-based assessments. Developers and researchers interested in benchmarking data model performance will find this repository particularly useful."
317
+ - <original text snippet> - <improving comments>
318
+ - <original text snippet> - <improving comments>
319
+ - ...
320
+ **Hardware and software spec and compatibility description:**
321
+ Detailed explanation of the assessment. Such as: The README provides partial information about software requirements, emphasizing tools like Poetry. Instructions regarding the setup of `.env` files and API keys are also provided. However, it doesn't specify hardware considerations like memory requirements or explain whether the software is compatible with particular operating systems. This omission can limit usability for certain users.
322
+ Detailed suggestion for improvement. Such as:
323
+ - Adding a subsection titled "Hardware Requirements" to outline memory, processor, or other computational dependencies for running benchmarks effectively.
324
+ - <original text snippet> - <improving comments>
325
+ - <original text snippet> - <improving comments>
326
+ - ...
327
+ **Dependencies clearly stated:**
328
+ Detailed explanation of the assessment. Such as: Dependencies are referenced sporadically throughout the README (e.g., using Poetry to install certain tools). However, there isn't a dedicated section that consolidates these into a simple and easy-to-follow format. This could hinder understanding, especially for users looking to quickly identify and install necessary dependencies.
329
+ Detailed suggestion for improvement. Such as:
330
+ - The dependencies are listed in the README and requirements.txt file. No need to improve.
331
+ **License Information Included:**
332
+ Detailed explanation of the assessment. Such as: The README mentions the MIT license, which is known for its permissive nature and widespread acceptance. The license information is clear and understandable. No improvements are necessary here.
333
+ Detailed suggestion for improvement. Such as: No need to improve.
334
+ **Code contributor / Author information included:**
335
+ Detailed explanation of the assessment. Such as: The README does not contain a section that credits contributors or maintains lines of communication for potential users or collaborators. This is an important omission, as it fails to acknowledge authors' efforts or encourage interaction.
336
+ Detailed suggestion for improvement. Such as:
337
+ - Including a new "Contributors" section to credit the developers, provide contact information (e.g., email or GitHub profiles), or invite collaboration.
338
+ - <original text snippet> - <improving comments>
339
+ **Overall Score:**
340
+ Detailed explanation of the assessment. Such as: The README is relatively easy to read for someone around the sixth-grade level. While the technical details provided are moderately easy to understand for those familiar with programming and command-line tools, newbies or non-technical users might face challenges due to jargon and lack of introductory explanations.
341
+ Detailed suggestion for improvement. Such as:
342
+ - Add a brief introductory section summarizing the project and its main purpose would help orient readers.
343
+ - <original text snippet> - <improving comments>
344
+ - <original text snippet> - <improving comments>
345
+ - ...
346
+ - Break down long instructions into smaller bullet points.
347
+
348
+ ---
349
+
350
+ ### **Structured Evaluation and Reasoning Process**
351
+ {structured_evaluation}
352
+
353
+ ---
354
+
355
+ ### **README Path**
356
+ {readme_path}
357
+
358
+ ---
359
+
360
+ ### **README content**
361
+ {readme_content}
362
+
363
+ """
364
+
365
+ FOLDER_LEVEL_EVALUATION_README_SYSTEM_PROMPT = """
366
+ You are an expert in evaluating the quality of README files in software repositories.
367
+ Your task is to analyze the provided README file and generate a comprehensive quality report.
368
+
369
+ ---
370
+
371
+ ### **Evaluation Criteria**
372
+
373
+ The README file is a **folder-level** file, use the following criteria instead.
374
+
375
+ For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
376
+
377
+ **1. Folder Description**
378
+ * **Assessment**: [Your evaluation of whether it Provides a clear **description** of what the folder contains (e.g., modules, scripts, data).]
379
+ * **Improvement Suggestions**:
380
+ * **Original text:** [Quote a specific line/section from the README.]
381
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
382
+
383
+ **2. Folder Purpose**
384
+ * **Assessment**: [Your evaluation of whether it explains the **purpose** or **role** of the components inside this subfolder.]
385
+ * **Improvement Suggestions**:
386
+ * **Original text:** [Quote text related to purpose.]
387
+ * **Improving comments:** [Provide your suggestions.]
388
+
389
+ **3. Usage**
390
+ * **Assessment**: [Your evaluation of whether it includes **usage instructions** specific to this folder (e.g., commands, import paths, input/output files).]
391
+ * **Improvement Suggestions**:
392
+ * **Original text:** [Quote text related to usage.]
393
+ * **Improving comments:** [Provide your suggestions.]
394
+
395
+ **4. Readability Analysis**
396
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
397
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
398
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
399
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
400
+ * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
401
+
402
+ ---
403
+
404
+ ### Final Report Format
405
+
406
+ #### Your output **must exactly match** the following template:
407
+
408
+ **FinalAnswer**
409
+ * **Score:** [Poor / Fair / Good / Excellent]
410
+ * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
411
+ * **Overall Improvement Suggestions:**
412
+ - "Original text snippet 1" - Improving comment 1
413
+ - "Original text snippet 2" - Improving comment 2
414
+ - ...
415
+
416
+ #### Notes
417
+
418
+ * **Score**: Overall quality rating, could be Poor / Fair / Good / Excellent.
419
+ * **Key Strengths**: Briefly highlight the README's strongest aspects.
420
+ * **Improvement Suggestions**: Provide concrete snippets and suggested improvements.
421
+
422
+ ---
423
+
424
+ ### **README path:**
425
+ {readme_path}
426
+
427
+ ---
428
+
429
+ ### **README Content:**
430
+ {readme_content}
431
+ """
432
+
433
+ class EvaluationREADMETask(EvaluationTask):
434
+ def __init__(
435
+ self,
436
+ llm: BaseChatOpenAI,
437
+ repo_path: str,
438
+ gitignore_path: str,
439
+ meta_data: ProjectMetadata | None = None,
440
+ step_callback: Callable | None = None,
441
+ summarized_files_db = None,
442
+ collected_files: list[str] | None = None,
443
+ ):
444
+ super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
445
+ self.evaluation_name = "README Evaluation"
446
+ self.collected_files = collected_files
447
+
448
+ def _project_level_evaluate(self, readme_files: list[str]) -> tuple[dict, dict]:
449
+ """
450
+ Evaluate if the README files are a project-level README file.
451
+ """
452
+ total_token_usage = {**DEFAULT_TOKEN_USAGE}
453
+ project_level_evaluations = {}
454
+ for readme_file in readme_files:
455
+ full_path = Path(self.repo_path, readme_file)
456
+ readme_content = read_file(full_path)
457
+ if readme_content is None or len(readme_content.strip()) == 0:
458
+ logger.error(f"Error in reading file {readme_file}")
459
+ project_level_evaluations[readme_file] = {
460
+ "project_level": "/" in readme_file,
461
+ "project_level_reasoning_process": f"Error in reading file {readme_file}" \
462
+ if readme_content is None else f"{readme_file} is an empty file.",
463
+ }
464
+ continue
465
+ system_prompt = ChatPromptTemplate.from_template(
466
+ README_PROJECT_LEVEL_SYSTEM_PROMPT
467
+ ).format(
468
+ readme_path=readme_file,
469
+ readme_content=readme_content,
470
+ )
471
+ response, token_usage, reasoning_process = run_llm_evaluation(
472
+ llm=self.llm,
473
+ system_prompt=system_prompt,
474
+ instruction_prompt=EVALUATION_INSTRUCTION,
475
+ schema=ProjectLevelEvaluationREADMEResult,
476
+ )
477
+ total_token_usage = increase_token_usage(total_token_usage, token_usage)
478
+ self.print_step(step_output=f"README: {readme_file} project level README")
479
+ project_level_evaluations[readme_file] = {
480
+ "project_level": response.project_level,
481
+ "project_level_reasoning_process": reasoning_process,
482
+ }
483
+
484
+ return project_level_evaluations, total_token_usage
485
+
486
+ def _structured_evaluate(self, readme_project_level: dict[str, dict] | None = None):
487
+ """ Evaluate README in structure:
488
+ available: bool
489
+ readability: score and suggestion
490
+ project purpose: bool, suggestion
491
+ hardware and software spec and compatibility description: score and suggestion
492
+ dependencies clearly stated: score and suggestion
493
+ license information included: bool and suggestion
494
+ Code contributor / author information included: bool and suggestion
495
+ overall score:
496
+ """
497
+ total_token_usage = {**DEFAULT_TOKEN_USAGE}
498
+ if readme_project_level is None:
499
+ return None, total_token_usage
500
+
501
+ license_content, license_path = read_license_file(self.repo_path)
502
+ license_summarized_content = summarize_file(
503
+ llm=self.llm,
504
+ name=license_path,
505
+ content=license_content,
506
+ level=6,
507
+ summary_instructions="What license is the repository using?",
508
+ ) if license_content is not None else "N/A"
509
+ license_path = license_path if license_content is not None else "N/A"
510
+ structured_readme_evaluations = {}
511
+ for readme_file in readme_project_level.keys():
512
+ project_level = readme_project_level[readme_file]["project_level"]
513
+ if not project_level:
514
+ continue
515
+ full_path = Path(self.repo_path, readme_file)
516
+ readme_content = read_file(full_path)
517
+ if readme_content is None:
518
+ logger.error(f"Error in reading file {readme_file}")
519
+ continue
520
+ if len(readme_content.strip()) == 0:
521
+ structured_readme_evaluations[readme_file] = {
522
+ "evaluation": StructuredEvaluationREADMEResult(
523
+ available_score=False,
524
+ readability_score=0,
525
+ readability_error_count=0,
526
+ readability_errors_found=[],
527
+ readability_suggestions="No readability provided",
528
+ project_purpose_score=False,
529
+ project_purpose_suggestions="No project purpose provided",
530
+ hardware_and_software_spec_score=0,
531
+ hardware_and_software_spec_suggestions="No hardware and software spec provided",
532
+ dependency_score=0,
533
+ dependency_suggestions="No dependency provided",
534
+ license_score=False,
535
+ license_suggestions="No license information",
536
+ contributor_author_score=False,
537
+ overall_score=0,
538
+ ),
539
+ "reasoning_process": f"{readme_file} is an empty file.",
540
+ }
541
+ continue
542
+ flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index = \
543
+ compute_readability_metrics(readme_content)
544
+ system_prompt = ChatPromptTemplate.from_template(
545
+ STRUCTURED_EVALUATION_README_SYSTEM_PROMPT
546
+ ).format(
547
+ readme_path=readme_file,
548
+ readme_content=readme_content,
549
+ license_path=license_path,
550
+ license_summarized_content=license_summarized_content,
551
+ flesch_reading_ease=flesch_reading_ease,
552
+ flesch_kincaid_grade=flesch_kincaid_grade,
553
+ gunning_fog_index=gunning_fog_index,
554
+ smog_index=smog_index,
555
+ )
556
+
557
+ response, token_usage, reasoning_process = run_llm_evaluation(
558
+ llm=self.llm,
559
+ system_prompt=system_prompt,
560
+ instruction_prompt=EVALUATION_INSTRUCTION,
561
+ schema=StructuredEvaluationREADMEResult,
562
+ chain=True,
563
+ )
564
+ response.overall_score = get_overall_score(
565
+ [
566
+ response.readability_score,
567
+ response.project_purpose_score,
568
+ response.hardware_and_software_spec_score,
569
+ response.dependency_score,
570
+ response.license_score,
571
+ response.contributor_author_score,
572
+ ],
573
+ [3, 3, 1, 1, 1, 1],
574
+ )
575
+ self.print_step(step_output=f"README: {readme_file} structured evaluation")
576
+ self.print_step(step_output=reasoning_process)
577
+ structured_readme_evaluations[readme_file] = {
578
+ "evaluation": response,
579
+ "reasoning_process": reasoning_process,
580
+ }
581
+ total_token_usage = increase_token_usage(total_token_usage, token_usage)
582
+
583
+ return structured_readme_evaluations, total_token_usage
584
+
585
+
586
+ def _free_project_level_readme_evaluate(
587
+ self,
588
+ readme_file: str,
589
+ structured_reasoning_process: str,
590
+ ) -> tuple[FreeProjectLevelEvaluationREADMEResult | None, dict, str]:
591
+ readme_path = Path(self.repo_path, readme_file)
592
+ readme_content = read_file(readme_path)
593
+ if readme_content is None:
594
+ logger.error(f"Error in reading file {readme_file}")
595
+ return None, {**DEFAULT_TOKEN_USAGE}, f"Error in reading file {readme_file}"
596
+ if readme_content.strip() == "":
597
+ return FreeProjectLevelEvaluationREADMEResult(
598
+ available=["Poor"],
599
+ readability=["Poor"],
600
+ project_purpose=["Poor"],
601
+ hardware_and_software_spec=["Poor"],
602
+ dependency=["Poor"],
603
+ license=["Poor"],
604
+ contributor_author=["Poor"],
605
+ overall_score=["Poor"],
606
+ ), {**DEFAULT_TOKEN_USAGE}, f"{readme_file} is an empty file."
607
+
608
+ system_prompt = ChatPromptTemplate.from_template(
609
+ PROJECT_LEVEL_EVALUATION_README_SYSTEM_PROMPT
610
+ ).format(
611
+ readme_path=readme_file,
612
+ readme_content=readme_content,
613
+ structured_evaluation=structured_reasoning_process,
614
+ )
615
+ response, token_usage, reasoning_process = run_llm_evaluation(
616
+ llm=self.llm,
617
+ system_prompt=system_prompt,
618
+ instruction_prompt=EVALUATION_INSTRUCTION,
619
+ schema=FreeProjectLevelEvaluationREADMEResult,
620
+ )
621
+ self.print_step(step_output=f"README: {readme_file} free project level README")
622
+ self.print_step(step_output=reasoning_process)
623
+ return response, token_usage, reasoning_process
624
+
625
+ def _free_folder_level_readme_evaluate(
626
+ self,
627
+ readme_file: str,
628
+ ) -> tuple[FreeFolderLevelEvaluationREADMEResult | None, dict, str]:
629
+ readme_path = Path(self.repo_path, readme_file)
630
+ readme_content = read_file(readme_path)
631
+ if readme_content is None:
632
+ logger.error(f"Error in reading file {readme_file}")
633
+ return None, {**DEFAULT_TOKEN_USAGE}, f"Error in reading file {readme_file}"
634
+ if readme_content.strip() == "":
635
+ return FreeFolderLevelEvaluationREADMEResult(
636
+ score="Poor",
637
+ key_strengths=f"{readme_file} is an empty file.",
638
+ overall_improvement_suggestions=[f"{readme_file} is an empty file."],
639
+ ), {**DEFAULT_TOKEN_USAGE}, f"{readme_file} is an empty file."
640
+
641
+ flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index = \
642
+ compute_readability_metrics(readme_content)
643
+ system_prompt = ChatPromptTemplate.from_template(
644
+ FOLDER_LEVEL_EVALUATION_README_SYSTEM_PROMPT
645
+ ).format(
646
+ readme_path=readme_file,
647
+ readme_content=readme_content,
648
+ flesch_reading_ease=flesch_reading_ease,
649
+ flesch_kincaid_grade=flesch_kincaid_grade,
650
+ gunning_fog_index=gunning_fog_index,
651
+ smog_index=smog_index,
652
+ )
653
+ response, token_usage, reasoning_process = run_llm_evaluation(
654
+ llm=self.llm,
655
+ system_prompt=system_prompt,
656
+ instruction_prompt=EVALUATION_INSTRUCTION,
657
+ schema=FreeFolderLevelEvaluationREADMEResult,
658
+ chain=True,
659
+ )
660
+ self.print_step(step_output=f"README: {readme_file} free folder level README")
661
+ self.print_step(step_output=reasoning_process)
662
+ return response, token_usage, reasoning_process
663
+
664
+ def _free_evaluate(
665
+ self,
666
+ readme_project_level: dict[str, dict],
667
+ structured_readme_evaluations: dict[str, dict]
668
+ ):
669
+ readme_files = readme_project_level.keys()
670
+ if readme_files is None or len(readme_files) == 0:
671
+ return None, {**DEFAULT_TOKEN_USAGE}
672
+
673
+ free_readme_evaluations = {}
674
+ total_token_usage = {**DEFAULT_TOKEN_USAGE}
675
+ for readme_file in readme_files:
676
+ readme_path = Path(self.repo_path, readme_file)
677
+ project_level = readme_project_level[readme_file]["project_level"]
678
+ readme_content = read_file(readme_path)
679
+ if readme_content is None:
680
+ logger.error(f"Error in reading file {readme_file}")
681
+ continue
682
+ if project_level:
683
+ evaluation, token_usage, reasoning_process = self._free_project_level_readme_evaluate(
684
+ readme_file=readme_file,
685
+ structured_reasoning_process=structured_readme_evaluations[readme_file]["reasoning_process"],
686
+ )
687
+ if evaluation is None:
688
+ continue
689
+ free_readme_evaluations[readme_file] = {
690
+ "evaluation": evaluation,
691
+ "reasoning_process": reasoning_process,
692
+ }
693
+ total_token_usage = increase_token_usage(total_token_usage, token_usage)
694
+
695
+ else:
696
+ evaluation, token_usage, reasoning_process = self._free_folder_level_readme_evaluate(
697
+ readme_file=readme_file,
698
+ )
699
+ if evaluation is None:
700
+ continue
701
+ free_readme_evaluations[readme_file] = {
702
+ "evaluation": evaluation,
703
+ "reasoning_process": reasoning_process,
704
+ }
705
+ total_token_usage = increase_token_usage(total_token_usage, token_usage)
706
+
707
+ return free_readme_evaluations, total_token_usage
708
+
709
+ def _evaluate(self, files: list[str]) -> tuple[dict[str, EvaluationREADMEResult], dict, list[str]]:
710
+ total_token_usage = {**DEFAULT_TOKEN_USAGE}
711
+ project_level_evaluations, project_level_token_usage = self._project_level_evaluate(files)
712
+ total_token_usage = increase_token_usage(total_token_usage, project_level_token_usage)
713
+ structured_readme_evaluations, structured_token_usage = self._structured_evaluate(project_level_evaluations)
714
+ total_token_usage = increase_token_usage(total_token_usage, structured_token_usage)
715
+ free_readme_evaluations, free_token_usage = self._free_evaluate(project_level_evaluations, structured_readme_evaluations)
716
+ total_token_usage = increase_token_usage(total_token_usage, free_token_usage)
717
+
718
+ # combine result
719
+ combined_evaluations = {}
720
+ for f in files:
721
+ if not f in free_readme_evaluations:
722
+ continue
723
+ project_level = project_level_evaluations[f]["project_level"]
724
+ if project_level:
725
+ combined_evaluations[f] = EvaluationREADMEResult(
726
+ project_level=project_level,
727
+ structured_evaluation=structured_readme_evaluations[f]["evaluation"],
728
+ free_evaluation=free_readme_evaluations[f]["evaluation"],
729
+ structured_reasoning_process=structured_readme_evaluations[f]["reasoning_process"],
730
+ free_reasoning_process=free_readme_evaluations[f]["reasoning_process"],
731
+ )
732
+ else:
733
+ combined_evaluations[f] = EvaluationREADMEResult(
734
+ project_level=project_level,
735
+ structured_evaluation=None,
736
+ free_evaluation=free_readme_evaluations[f]["evaluation"],
737
+ structured_reasoning_process=None,
738
+ free_reasoning_process=free_readme_evaluations[f]["reasoning_process"],
739
+ )
740
+
741
+ return combined_evaluations, total_token_usage, files
742
+
743
+ def _collect_files(self):
744
+ """
745
+ Search for a README file in the repository directory.
746
+ """
747
+ if self.collected_files is not None:
748
+ return self.collected_files
749
+
750
+ possible_readme_files = [
751
+ "readme.md",
752
+ "readme.rst",
753
+ "readme.txt",
754
+ "readme",
755
+ ]
756
+ repo_path = self.repo_path
757
+ gitignore_path = Path(repo_path, ".gitignore")
758
+ gitignore_checker = GitignoreChecker(
759
+ directory=repo_path, gitignore_path=gitignore_path,
760
+ exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
761
+ exclude_file_patterns=configs["file_filters"]["excluded_files"],
762
+ )
763
+ found_readme_files = gitignore_checker.check_files_and_folders(
764
+ check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
765
+ )
766
+
767
+ return found_readme_files