janus-llm 3.2.0__py3-none-any.whl → 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janus/__init__.py +3 -3
- janus/_tests/test_cli.py +3 -3
- janus/cli.py +1 -1
- janus/converter/__init__.py +6 -6
- janus/converter/_tests/test_translate.py +3 -231
- janus/embedding/_tests/test_collections.py +2 -2
- janus/embedding/_tests/test_database.py +1 -1
- janus/embedding/_tests/test_vectorize.py +3 -3
- janus/embedding/collections.py +2 -2
- janus/embedding/database.py +1 -1
- janus/embedding/embedding_models_info.py +1 -1
- janus/embedding/vectorize.py +5 -5
- janus/language/_tests/test_combine.py +1 -1
- janus/language/_tests/test_splitter.py +1 -1
- janus/language/alc/_tests/test_alc.py +3 -3
- janus/language/alc/alc.py +5 -5
- janus/language/binary/_tests/test_binary.py +2 -2
- janus/language/binary/binary.py +5 -5
- janus/language/block.py +2 -2
- janus/language/combine.py +3 -3
- janus/language/file.py +2 -2
- janus/language/mumps/_tests/test_mumps.py +3 -3
- janus/language/mumps/mumps.py +5 -5
- janus/language/mumps/patterns.py +1 -1
- janus/language/naive/__init__.py +4 -4
- janus/language/naive/basic_splitter.py +4 -4
- janus/language/naive/chunk_splitter.py +4 -4
- janus/language/naive/registry.py +1 -1
- janus/language/naive/simple_ast.py +5 -5
- janus/language/naive/tag_splitter.py +4 -4
- janus/language/node.py +1 -1
- janus/language/splitter.py +4 -4
- janus/language/treesitter/_tests/test_treesitter.py +3 -3
- janus/language/treesitter/treesitter.py +4 -4
- janus/llm/__init__.py +1 -1
- janus/llm/model_callbacks.py +1 -1
- janus/llm/models_info.py +3 -3
- janus/metrics/_tests/test_bleu.py +1 -1
- janus/metrics/_tests/test_chrf.py +1 -1
- janus/metrics/_tests/test_file_pairing.py +1 -1
- janus/metrics/_tests/test_llm.py +2 -2
- janus/metrics/_tests/test_reading.py +1 -1
- janus/metrics/_tests/test_rouge_score.py +1 -1
- janus/metrics/_tests/test_similarity_score.py +1 -1
- janus/metrics/_tests/test_treesitter_metrics.py +2 -2
- janus/metrics/bleu.py +1 -1
- janus/metrics/chrf.py +1 -1
- janus/metrics/complexity_metrics.py +4 -4
- janus/metrics/file_pairing.py +5 -5
- janus/metrics/llm_metrics.py +1 -1
- janus/metrics/metric.py +7 -7
- janus/metrics/reading.py +1 -1
- janus/metrics/rouge_score.py +1 -1
- janus/metrics/similarity.py +2 -2
- janus/parsers/_tests/test_code_parser.py +1 -1
- janus/parsers/code_parser.py +2 -2
- janus/parsers/doc_parser.py +3 -3
- janus/parsers/eval_parser.py +2 -2
- janus/parsers/reqs_parser.py +3 -3
- janus/parsers/uml.py +1 -2
- janus/prompts/prompt.py +2 -2
- janus/utils/_tests/test_logger.py +1 -1
- janus/utils/_tests/test_progress.py +1 -1
- janus/utils/progress.py +1 -1
- {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/METADATA +1 -1
- janus_llm-3.2.1.dist-info/RECORD +105 -0
- janus_llm-3.2.0.dist-info/RECORD +0 -105
- {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/LICENSE +0 -0
- {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/WHEEL +0 -0
- {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/entry_points.txt +0 -0
janus/__init__.py
CHANGED
@@ -2,10 +2,10 @@ import warnings
|
|
2
2
|
|
3
3
|
from langchain_core._api.deprecation import LangChainDeprecationWarning
|
4
4
|
|
5
|
-
from .converter.translate import Translator
|
6
|
-
from .metrics import * # noqa: F403
|
5
|
+
from janus.converter.translate import Translator
|
6
|
+
from janus.metrics import * # noqa: F403
|
7
7
|
|
8
|
-
__version__ = "3.2.
|
8
|
+
__version__ = "3.2.1"
|
9
9
|
|
10
10
|
# Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
|
11
11
|
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
janus/_tests/test_cli.py
CHANGED
@@ -4,9 +4,9 @@ from unittest.mock import ANY, patch
|
|
4
4
|
|
5
5
|
from typer.testing import CliRunner
|
6
6
|
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
7
|
+
from janus.cli import app, translate
|
8
|
+
from janus.embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
|
9
|
+
from janus.llm.models_info import MODEL_CONFIG_DIR
|
10
10
|
|
11
11
|
|
12
12
|
class TestCli(unittest.TestCase):
|
janus/cli.py
CHANGED
janus/converter/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
from .converter import Converter
|
2
|
-
from .diagram import DiagramGenerator
|
3
|
-
from .document import Documenter, MadLibsDocumenter, MultiDocumenter
|
4
|
-
from .evaluate import Evaluator
|
5
|
-
from .requirements import RequirementsDocumenter
|
6
|
-
from .translate import Translator
|
1
|
+
from janus.converter.converter import Converter
|
2
|
+
from janus.converter.diagram import DiagramGenerator
|
3
|
+
from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
|
4
|
+
from janus.converter.evaluate import Evaluator
|
5
|
+
from janus.converter.requirements import RequirementsDocumenter
|
6
|
+
from janus.converter.translate import Translator
|
@@ -7,37 +7,11 @@ from langchain.schema import Document
|
|
7
7
|
from langchain.schema.embeddings import Embeddings
|
8
8
|
from langchain.schema.vectorstore import VST, VectorStore
|
9
9
|
|
10
|
+
from janus.converter.diagram import DiagramGenerator
|
11
|
+
from janus.converter.requirements import RequirementsDocumenter
|
12
|
+
from janus.converter.translate import Translator
|
10
13
|
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
11
14
|
|
12
|
-
from ..diagram import DiagramGenerator
|
13
|
-
from ..requirements import RequirementsDocumenter
|
14
|
-
from ..translate import Translator
|
15
|
-
|
16
|
-
# from langchain.vectorstores import Chroma
|
17
|
-
|
18
|
-
|
19
|
-
# from ..utils.enums import EmbeddingType
|
20
|
-
|
21
|
-
|
22
|
-
def print_query_results(query, n_results):
|
23
|
-
# print(f"\n{query}")
|
24
|
-
# count = 1
|
25
|
-
# for t in n_results:
|
26
|
-
# short_code = (
|
27
|
-
# (t[0].page_content[0:50] + "..")
|
28
|
-
# if (len(t[0].page_content) > 50)
|
29
|
-
# else t[0].page_content
|
30
|
-
# )
|
31
|
-
# return_index = short_code.find("\n")
|
32
|
-
# if -1 != return_index:
|
33
|
-
# short_code = short_code[0:return_index] + ".."
|
34
|
-
# print(
|
35
|
-
# f"{count}. @ {t[0].metadata['start_line']}-{t[0].metadata['end_line']}"
|
36
|
-
# f" -- {t[1]} -- {short_code}"
|
37
|
-
# )
|
38
|
-
# count += 1
|
39
|
-
pass
|
40
|
-
|
41
15
|
|
42
16
|
class MockCollection(VectorStore):
|
43
17
|
"""Vector store for testing"""
|
@@ -65,14 +39,6 @@ class MockCollection(VectorStore):
|
|
65
39
|
raise NotImplementedError("from_texts() not implemented!")
|
66
40
|
|
67
41
|
|
68
|
-
# class MockEmbeddingsFactory(EmbeddingsFactory):
|
69
|
-
# """Embeddings for testing - uses MockCollection"""
|
70
|
-
#
|
71
|
-
# def get_embeddings(self) -> Embeddings:
|
72
|
-
# return MockCollection()
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
42
|
class TestTranslator(unittest.TestCase):
|
77
43
|
"""Tests for the Translator class."""
|
78
44
|
|
@@ -105,200 +71,6 @@ class TestTranslator(unittest.TestCase):
|
|
105
71
|
# unit tests anyway
|
106
72
|
self.assertTrue(python_file.exists())
|
107
73
|
|
108
|
-
# def test_embeddings(self):
|
109
|
-
# """Testing access to embeddings"""
|
110
|
-
# vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
|
111
|
-
# self.assertIsInstance(vector_store, Chroma, "Unexpected vector store type!")
|
112
|
-
# self.assertEqual(
|
113
|
-
# 0, vector_store._collection.count(), "Non-empty initial vector store?"
|
114
|
-
# )
|
115
|
-
#
|
116
|
-
# self.translator.set_model("llama")
|
117
|
-
# self.translator._load_parameters()
|
118
|
-
# vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
|
119
|
-
# self.assertIsInstance(vector_store, Chroma)
|
120
|
-
# self.assertEqual(
|
121
|
-
# 0, vector_store._collection.count(), "Non-empty initial vector store?"
|
122
|
-
# )
|
123
|
-
|
124
|
-
# def test_embed_split_source(self):
|
125
|
-
# """Characterize _embed method"""
|
126
|
-
# mock_embeddings = MockEmbeddingsFactory()
|
127
|
-
# self.translator.set_embeddings(mock_embeddings)
|
128
|
-
# self.translator._load_parameters()
|
129
|
-
# input_block = self.translator.splitter.split(self.test_file)
|
130
|
-
# self.assertIsNone(
|
131
|
-
# input_block.text, "Root node of input text shouldn't contain text"
|
132
|
-
# )
|
133
|
-
# self.assertIsNone(input_block.embedding_id, "Precondition failed")
|
134
|
-
#
|
135
|
-
# result = self.translator._embed(
|
136
|
-
# input_block, EmbeddingType.SOURCE, self.test_file.name
|
137
|
-
# )
|
138
|
-
#
|
139
|
-
# self.assertFalse(result, "Nothing to embed, so should have no result")
|
140
|
-
# self.assertIsNone(
|
141
|
-
# input_block.embedding_id, "Embeddings should not have changed")
|
142
|
-
|
143
|
-
# def test_embed_has_values_for_each_non_empty_node(self):
|
144
|
-
# """Characterize our sample fortran file"""
|
145
|
-
# mock_embeddings = MockEmbeddingsFactory()
|
146
|
-
# self.translator.set_embeddings(mock_embeddings)
|
147
|
-
# self.translator._load_parameters()
|
148
|
-
# input_block = self.translator.splitter.split(self.test_file)
|
149
|
-
# self.translator._embed_nodes_recursively(
|
150
|
-
# input_block, EmbeddingType.SOURCE, self.test_file.name
|
151
|
-
# )
|
152
|
-
# has_text_count = 0
|
153
|
-
# has_embeddings_count = 0
|
154
|
-
# nodes = [input_block]
|
155
|
-
# while nodes:
|
156
|
-
# node = nodes.pop(0)
|
157
|
-
# if node.text:
|
158
|
-
# has_text_count += 1
|
159
|
-
# if node.embedding_id:
|
160
|
-
# has_embeddings_count += 1
|
161
|
-
# nodes.extend(node.children)
|
162
|
-
# self.assertEqual(
|
163
|
-
# self.TEST_FILE_EMBEDDING_COUNT,
|
164
|
-
# has_text_count,
|
165
|
-
# "Parsing of test_file has changed!",
|
166
|
-
# )
|
167
|
-
# self.assertEqual(
|
168
|
-
# self.TEST_FILE_EMBEDDING_COUNT,
|
169
|
-
# has_embeddings_count,
|
170
|
-
# "Not all non-empty nodes have embeddings!",
|
171
|
-
# )
|
172
|
-
|
173
|
-
# def test_embed_nodes_recursively(self):
|
174
|
-
# mock_embeddings = MockEmbeddingsFactory()
|
175
|
-
# self.translator.set_embeddings(mock_embeddings)
|
176
|
-
# self.translator._load_parameters()
|
177
|
-
# input_block = self.translator.splitter.split(self.test_file)
|
178
|
-
# self.translator._embed_nodes_recursively(
|
179
|
-
# input_block, EmbeddingType.SOURCE, self.test_file.name
|
180
|
-
# )
|
181
|
-
# nodes = [input_block]
|
182
|
-
# while nodes:
|
183
|
-
# node = nodes.pop(0)
|
184
|
-
# self.assertEqual(node.text is not None, node.embedding_id is not None)
|
185
|
-
# nodes.extend(node.children)
|
186
|
-
|
187
|
-
# @pytest.mark.slow
|
188
|
-
# def test_translate_file_adds_source_embeddings(self):
|
189
|
-
# mock_embeddings = MockEmbeddingsFactory()
|
190
|
-
# self.translator.set_embeddings(mock_embeddings)
|
191
|
-
# self.translator._load_parameters()
|
192
|
-
# vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
|
193
|
-
# self.assertEqual(0, vector_store._add_texts_calls, "precondition")
|
194
|
-
#
|
195
|
-
# self.translator.translate_file(self.test_file)
|
196
|
-
#
|
197
|
-
# self.assertEqual(
|
198
|
-
# self.TEST_FILE_EMBEDDING_COUNT,
|
199
|
-
# vector_store._add_texts_calls,
|
200
|
-
# "Did not find expected source embeddings",
|
201
|
-
# )
|
202
|
-
|
203
|
-
# @pytest.mark.slow
|
204
|
-
# def test_embeddings_usage(self):
|
205
|
-
# """Noodling on use of embeddings
|
206
|
-
# To see results have to uncomment print_query_results() above
|
207
|
-
# """
|
208
|
-
# input_block = self.translator.splitter.split(self.test_file)
|
209
|
-
# self.translator._embed_nodes_recursively(
|
210
|
-
# input_block, EmbeddingType.SOURCE, self.test_file.name
|
211
|
-
# )
|
212
|
-
# vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
|
213
|
-
#
|
214
|
-
# # this symbol has the lowest relevance scores of any in this test, but
|
215
|
-
# # still not very low; multiple embedded nodes contain it
|
216
|
-
# QUERY_STRING = "IWX_BAND_START"
|
217
|
-
# query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
|
218
|
-
# n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
|
219
|
-
# embedding=query,
|
220
|
-
# k=10,
|
221
|
-
# where_document={"$contains": QUERY_STRING},
|
222
|
-
# )
|
223
|
-
# self.assertTrue(len(n_results) > 1, "Why was valid symbol not found?")
|
224
|
-
# print_query_results(QUERY_STRING, n_results)
|
225
|
-
|
226
|
-
# in the XYZZY test, the least dissimilar results were the start and finish lines
|
227
|
-
# 0, and 415, which produced a similarity score of 0.47:
|
228
|
-
|
229
|
-
# QUERY_STRING = "XYZZY"
|
230
|
-
# query = self.translator._embeddings.embed_query(QUERY_STRING)
|
231
|
-
# n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
|
232
|
-
# embedding=query,
|
233
|
-
# k=10,
|
234
|
-
# # filter={"end_line": 15},
|
235
|
-
# # filter={"$and": [{"end_line": 15}, {"tokens": {"$gte": 21}}]},
|
236
|
-
# # where_document={"$contains": QUERY_STRING},
|
237
|
-
# )
|
238
|
-
# print_query_results(QUERY_STRING, n_results)
|
239
|
-
# # self.assertTrue(len(n_results) == 0, "Invalid symbol was found?")
|
240
|
-
|
241
|
-
# # only returns a single result because only 1 embedded node contains
|
242
|
-
# # CSV_ICASEARR:
|
243
|
-
# QUERY_STRING = "What is the use of CSV_ICASEARR?"
|
244
|
-
# query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
|
245
|
-
# n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
|
246
|
-
# embedding=query,
|
247
|
-
# k=10,
|
248
|
-
# # where_document={"$contains": QUERY_STRING},
|
249
|
-
# where_document={"$contains": "CSV_ICASEARR"},
|
250
|
-
# )
|
251
|
-
# print_query_results(QUERY_STRING, n_results)
|
252
|
-
# self.assertTrue(len(n_results) == 1, "Was splitting changed?")
|
253
|
-
#
|
254
|
-
# # trimmed out some characters from line 43, and still not very similar scoring
|
255
|
-
# QUERY_STRING = "IYL_EDGEBUFFER EDGEBUFFER IGN_MASK CELLSIZE"
|
256
|
-
# query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
|
257
|
-
# n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
|
258
|
-
# embedding=query,
|
259
|
-
# k=10,
|
260
|
-
# # where_document={"$contains": QUERY_STRING},
|
261
|
-
# )
|
262
|
-
# print_query_results(QUERY_STRING, n_results)
|
263
|
-
#
|
264
|
-
# # random string (as bad as XYZZY), but searching for a specific line
|
265
|
-
# QUERY_STRING = "ghost in the invisible moon"
|
266
|
-
# query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
|
267
|
-
# n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
|
268
|
-
# embedding=query,
|
269
|
-
# k=10,
|
270
|
-
# filter={"$and": [{"end_line": 90}, {"tokens": {"$gte": 21}}]},
|
271
|
-
# )
|
272
|
-
# print_query_results(QUERY_STRING, n_results)
|
273
|
-
# self.assertTrue(len(n_results) == 1, "Was splitting changed?")
|
274
|
-
|
275
|
-
# @pytest.mark.slow
|
276
|
-
# def test_document_embeddings_added_by_translate(self):
|
277
|
-
# vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
|
278
|
-
# self.assertEqual(0, vector_store._add_texts_calls, "Precondition failed")
|
279
|
-
# self.req_translator.translate(self.test_file.parent, self.test_file.parent,
|
280
|
-
# True)
|
281
|
-
# self.assertTrue(vector_store._add_texts_calls > 0, "Why no documentation?")
|
282
|
-
|
283
|
-
# @pytest.mark.slow
|
284
|
-
# def test_embed_requirements(self):
|
285
|
-
# vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
|
286
|
-
# translated = self.req_translator.translate_file(self.test_file)
|
287
|
-
# self.assertEqual(
|
288
|
-
# 0,
|
289
|
-
# vector_store._add_texts_calls,
|
290
|
-
# "Unexpected requirements added in translate_file",
|
291
|
-
# )
|
292
|
-
# result = self.req_translator._embed(
|
293
|
-
# translated, EmbeddingType.REQUIREMENT, self.test_file.name
|
294
|
-
# )
|
295
|
-
# self.assertFalse(result, "No text in root node, so should generate no docs")
|
296
|
-
# self.assertIsNotNone(translated.children[0].text, "Data changed?")
|
297
|
-
# result = self.req_translator._embed(
|
298
|
-
# translated.children[0], EmbeddingType.REQUIREMENT, self.test_file.name
|
299
|
-
# )
|
300
|
-
# self.assertTrue(result, "No docs generated for first child node?")
|
301
|
-
|
302
74
|
def test_invalid_selections(self) -> None:
|
303
75
|
"""Tests that settings values for the translator will raise exceptions"""
|
304
76
|
self.assertRaises(
|
@@ -4,8 +4,8 @@ from unittest.mock import MagicMock
|
|
4
4
|
|
5
5
|
import pytest
|
6
6
|
|
7
|
-
from
|
8
|
-
from
|
7
|
+
from janus.embedding.collections import Collections
|
8
|
+
from janus.utils.enums import EmbeddingType
|
9
9
|
|
10
10
|
|
11
11
|
class TestCollections(unittest.TestCase):
|
@@ -5,9 +5,9 @@ from unittest.mock import MagicMock
|
|
5
5
|
|
6
6
|
from chromadb.api.client import Client
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
10
|
-
from
|
8
|
+
from janus.embedding.vectorize import Vectorizer, VectorizerFactory
|
9
|
+
from janus.language.treesitter import TreeSitterSplitter
|
10
|
+
from janus.utils.enums import EmbeddingType
|
11
11
|
|
12
12
|
|
13
13
|
class MockDBVectorizer(VectorizerFactory):
|
janus/embedding/collections.py
CHANGED
@@ -5,8 +5,8 @@ from typing import Dict, Optional, Sequence
|
|
5
5
|
from chromadb import Client, Collection
|
6
6
|
from langchain_community.vectorstores import Chroma
|
7
7
|
|
8
|
-
from
|
9
|
-
from .
|
8
|
+
from janus.embedding.embedding_models_info import load_embedding_model
|
9
|
+
from janus.utils.enums import EmbeddingType
|
10
10
|
|
11
11
|
# See https://docs.trychroma.com/telemetry#in-chromas-backend-using-environment-variables
|
12
12
|
os.environ["ANONYMIZED_TELEMETRY"] = "False"
|
janus/embedding/database.py
CHANGED
@@ -8,7 +8,7 @@ from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEm
|
|
8
8
|
from langchain_core.embeddings import Embeddings
|
9
9
|
from langchain_openai import OpenAIEmbeddings
|
10
10
|
|
11
|
-
from
|
11
|
+
from janus.utils.logger import create_logger
|
12
12
|
|
13
13
|
load_dotenv()
|
14
14
|
|
janus/embedding/vectorize.py
CHANGED
@@ -6,10 +6,10 @@ from typing import Any, Dict, Optional, Sequence
|
|
6
6
|
from chromadb import Client, Collection
|
7
7
|
from langchain_community.vectorstores import Chroma
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from .
|
12
|
-
from .
|
9
|
+
from janus.embedding.collections import Collections
|
10
|
+
from janus.embedding.database import ChromaEmbeddingDatabase
|
11
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
12
|
+
from janus.utils.enums import EmbeddingType
|
13
13
|
|
14
14
|
|
15
15
|
class Vectorizer(object):
|
@@ -59,7 +59,7 @@ class Vectorizer(object):
|
|
59
59
|
self,
|
60
60
|
code_block: CodeBlock,
|
61
61
|
collection_name: EmbeddingType | str,
|
62
|
-
filename: str # perhaps this should be a relative path from the source, but for
|
62
|
+
filename: str, # perhaps this should be a relative path from the source, but for
|
63
63
|
# now we're all in 1 directory
|
64
64
|
) -> None:
|
65
65
|
"""Calculate `code_block` embedding, returning success & storing in `embedding_id`
|
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.alc import AlcSplitter
|
5
|
+
from janus.language.combine import Combiner
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestAlcSplitter(unittest.TestCase):
|
janus/language/alc/alc.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from langchain.schema.language_model import BaseLanguageModel
|
2
2
|
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
3
|
+
from janus.language.block import CodeBlock
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.node import NodeType
|
6
|
+
from janus.language.treesitter import TreeSitterSplitter
|
7
|
+
from janus.utils.logger import create_logger
|
8
8
|
|
9
9
|
log = create_logger(__name__)
|
10
10
|
|
@@ -5,8 +5,8 @@ from unittest.mock import patch
|
|
5
5
|
|
6
6
|
import pytest
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
8
|
+
from janus.language.binary import BinarySplitter
|
9
|
+
from janus.llm import load_model
|
10
10
|
|
11
11
|
|
12
12
|
class TestBinarySplitter(unittest.TestCase):
|
janus/language/binary/binary.py
CHANGED
@@ -7,11 +7,11 @@ from pathlib import Path
|
|
7
7
|
import tree_sitter
|
8
8
|
from langchain.schema.language_model import BaseLanguageModel
|
9
9
|
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
10
|
+
from janus.language.block import CodeBlock
|
11
|
+
from janus.language.combine import Combiner
|
12
|
+
from janus.language.treesitter import TreeSitterSplitter
|
13
|
+
from janus.utils.enums import LANGUAGES
|
14
|
+
from janus.utils.logger import create_logger
|
15
15
|
|
16
16
|
log = create_logger(__name__)
|
17
17
|
|
janus/language/block.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from functools import total_ordering
|
2
2
|
from typing import ForwardRef, Hashable, Optional, Tuple
|
3
3
|
|
4
|
-
from
|
5
|
-
from .
|
4
|
+
from janus.language.node import NodeType
|
5
|
+
from janus.utils.logger import create_logger
|
6
6
|
|
7
7
|
log = create_logger(__name__)
|
8
8
|
|
janus/language/combine.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from .
|
3
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock, TranslatedCodeBlock
|
2
|
+
from janus.language.file import FileManager
|
3
|
+
from janus.utils.logger import create_logger
|
4
4
|
|
5
5
|
log = create_logger(__name__)
|
6
6
|
|
janus/language/file.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import unittest
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from
|
4
|
+
from janus.language.combine import Combiner
|
5
|
+
from janus.language.mumps import MumpsSplitter
|
6
|
+
from janus.llm import load_model
|
7
7
|
|
8
8
|
|
9
9
|
class TestMumpsSplitter(unittest.TestCase):
|
janus/language/mumps/mumps.py
CHANGED
@@ -2,11 +2,11 @@ import re
|
|
2
2
|
|
3
3
|
from langchain.schema.language_model import BaseLanguageModel
|
4
4
|
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
5
|
+
from janus.language.block import CodeBlock
|
6
|
+
from janus.language.combine import Combiner
|
7
|
+
from janus.language.node import NodeType
|
8
|
+
from janus.language.splitter import Splitter
|
9
|
+
from janus.utils.logger import create_logger
|
10
10
|
|
11
11
|
log = create_logger(__name__)
|
12
12
|
|
janus/language/mumps/patterns.py
CHANGED
janus/language/naive/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .basic_splitter import FileSplitter
|
2
|
-
from .chunk_splitter import ChunkSplitter
|
3
|
-
from .simple_ast import get_flexible_ast, get_strict_ast
|
4
|
-
from .tag_splitter import TagSplitter
|
1
|
+
from janus.language.naive.basic_splitter import FileSplitter
|
2
|
+
from janus.language.naive.chunk_splitter import ChunkSplitter
|
3
|
+
from janus.language.naive.simple_ast import get_flexible_ast, get_strict_ast
|
4
|
+
from janus.language.naive.tag_splitter import TagSplitter
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.chunk_splitter import ChunkSplitter
|
3
|
+
from janus.language.naive.registry import register_splitter
|
4
|
+
from janus.language.splitter import FileSizeError
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("file")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.registry import register_splitter
|
3
|
+
from janus.language.node import NodeType
|
4
|
+
from janus.language.splitter import Splitter
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("chunk")
|
janus/language/naive/registry.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from
|
5
|
-
from .
|
1
|
+
from janus.language.alc.alc import AlcSplitter
|
2
|
+
from janus.language.mumps.mumps import MumpsSplitter
|
3
|
+
from janus.language.naive.registry import register_splitter
|
4
|
+
from janus.language.treesitter import TreeSitterSplitter
|
5
|
+
from janus.utils.enums import LANGUAGES
|
6
6
|
|
7
7
|
|
8
8
|
@register_splitter("ast-flex")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
4
|
-
from .
|
1
|
+
from janus.language.block import CodeBlock
|
2
|
+
from janus.language.naive.registry import register_splitter
|
3
|
+
from janus.language.node import NodeType
|
4
|
+
from janus.language.splitter import Splitter
|
5
5
|
|
6
6
|
|
7
7
|
@register_splitter("tag")
|
janus/language/node.py
CHANGED
janus/language/splitter.py
CHANGED
@@ -6,10 +6,10 @@ from typing import List
|
|
6
6
|
import tiktoken
|
7
7
|
from langchain.schema.language_model import BaseLanguageModel
|
8
8
|
|
9
|
-
from
|
10
|
-
from .
|
11
|
-
from .
|
12
|
-
from .
|
9
|
+
from janus.language.block import CodeBlock
|
10
|
+
from janus.language.file import FileManager
|
11
|
+
from janus.language.node import NodeType
|
12
|
+
from janus.utils.logger import create_logger
|
13
13
|
|
14
14
|
log = create_logger(__name__)
|
15
15
|
|