janus-llm 3.2.0__py3-none-any.whl → 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. janus/__init__.py +3 -3
  2. janus/_tests/test_cli.py +3 -3
  3. janus/cli.py +1 -1
  4. janus/converter/__init__.py +6 -6
  5. janus/converter/_tests/test_translate.py +3 -231
  6. janus/embedding/_tests/test_collections.py +2 -2
  7. janus/embedding/_tests/test_database.py +1 -1
  8. janus/embedding/_tests/test_vectorize.py +3 -3
  9. janus/embedding/collections.py +2 -2
  10. janus/embedding/database.py +1 -1
  11. janus/embedding/embedding_models_info.py +1 -1
  12. janus/embedding/vectorize.py +5 -5
  13. janus/language/_tests/test_combine.py +1 -1
  14. janus/language/_tests/test_splitter.py +1 -1
  15. janus/language/alc/_tests/test_alc.py +3 -3
  16. janus/language/alc/alc.py +5 -5
  17. janus/language/binary/_tests/test_binary.py +2 -2
  18. janus/language/binary/binary.py +5 -5
  19. janus/language/block.py +2 -2
  20. janus/language/combine.py +3 -3
  21. janus/language/file.py +2 -2
  22. janus/language/mumps/_tests/test_mumps.py +3 -3
  23. janus/language/mumps/mumps.py +5 -5
  24. janus/language/mumps/patterns.py +1 -1
  25. janus/language/naive/__init__.py +4 -4
  26. janus/language/naive/basic_splitter.py +4 -4
  27. janus/language/naive/chunk_splitter.py +4 -4
  28. janus/language/naive/registry.py +1 -1
  29. janus/language/naive/simple_ast.py +5 -5
  30. janus/language/naive/tag_splitter.py +4 -4
  31. janus/language/node.py +1 -1
  32. janus/language/splitter.py +4 -4
  33. janus/language/treesitter/_tests/test_treesitter.py +3 -3
  34. janus/language/treesitter/treesitter.py +4 -4
  35. janus/llm/__init__.py +1 -1
  36. janus/llm/model_callbacks.py +1 -1
  37. janus/llm/models_info.py +3 -3
  38. janus/metrics/_tests/test_bleu.py +1 -1
  39. janus/metrics/_tests/test_chrf.py +1 -1
  40. janus/metrics/_tests/test_file_pairing.py +1 -1
  41. janus/metrics/_tests/test_llm.py +2 -2
  42. janus/metrics/_tests/test_reading.py +1 -1
  43. janus/metrics/_tests/test_rouge_score.py +1 -1
  44. janus/metrics/_tests/test_similarity_score.py +1 -1
  45. janus/metrics/_tests/test_treesitter_metrics.py +2 -2
  46. janus/metrics/bleu.py +1 -1
  47. janus/metrics/chrf.py +1 -1
  48. janus/metrics/complexity_metrics.py +4 -4
  49. janus/metrics/file_pairing.py +5 -5
  50. janus/metrics/llm_metrics.py +1 -1
  51. janus/metrics/metric.py +7 -7
  52. janus/metrics/reading.py +1 -1
  53. janus/metrics/rouge_score.py +1 -1
  54. janus/metrics/similarity.py +2 -2
  55. janus/parsers/_tests/test_code_parser.py +1 -1
  56. janus/parsers/code_parser.py +2 -2
  57. janus/parsers/doc_parser.py +3 -3
  58. janus/parsers/eval_parser.py +2 -2
  59. janus/parsers/reqs_parser.py +3 -3
  60. janus/parsers/uml.py +1 -2
  61. janus/prompts/prompt.py +2 -2
  62. janus/utils/_tests/test_logger.py +1 -1
  63. janus/utils/_tests/test_progress.py +1 -1
  64. janus/utils/progress.py +1 -1
  65. {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/METADATA +1 -1
  66. janus_llm-3.2.1.dist-info/RECORD +105 -0
  67. janus_llm-3.2.0.dist-info/RECORD +0 -105
  68. {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/LICENSE +0 -0
  69. {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/WHEEL +0 -0
  70. {janus_llm-3.2.0.dist-info → janus_llm-3.2.1.dist-info}/entry_points.txt +0 -0
janus/__init__.py CHANGED
@@ -2,10 +2,10 @@ import warnings
2
2
 
3
3
  from langchain_core._api.deprecation import LangChainDeprecationWarning
4
4
 
5
- from .converter.translate import Translator
6
- from .metrics import * # noqa: F403
5
+ from janus.converter.translate import Translator
6
+ from janus.metrics import * # noqa: F403
7
7
 
8
- __version__ = "3.2.0"
8
+ __version__ = "3.2.1"
9
9
 
10
10
  # Ignoring a deprecation warning from langchain_core that I can't seem to hunt down
11
11
  warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
janus/_tests/test_cli.py CHANGED
@@ -4,9 +4,9 @@ from unittest.mock import ANY, patch
4
4
 
5
5
  from typer.testing import CliRunner
6
6
 
7
- from ..cli import app, translate
8
- from ..embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
9
- from ..llm.models_info import MODEL_CONFIG_DIR
7
+ from janus.cli import app, translate
8
+ from janus.embedding.embedding_models_info import EMBEDDING_MODEL_CONFIG_DIR
9
+ from janus.llm.models_info import MODEL_CONFIG_DIR
10
10
 
11
11
 
12
12
  class TestCli(unittest.TestCase):
janus/cli.py CHANGED
@@ -108,7 +108,7 @@ embedding = typer.Typer(
108
108
 
109
109
  def version_callback(value: bool) -> None:
110
110
  if value:
111
- from . import __version__ as version
111
+ from janus import __version__ as version
112
112
 
113
113
  print(f"Janus CLI [blue]v{version}[/blue]")
114
114
  raise typer.Exit()
@@ -1,6 +1,6 @@
1
- from .converter import Converter
2
- from .diagram import DiagramGenerator
3
- from .document import Documenter, MadLibsDocumenter, MultiDocumenter
4
- from .evaluate import Evaluator
5
- from .requirements import RequirementsDocumenter
6
- from .translate import Translator
1
+ from janus.converter.converter import Converter
2
+ from janus.converter.diagram import DiagramGenerator
3
+ from janus.converter.document import Documenter, MadLibsDocumenter, MultiDocumenter
4
+ from janus.converter.evaluate import Evaluator
5
+ from janus.converter.requirements import RequirementsDocumenter
6
+ from janus.converter.translate import Translator
@@ -7,37 +7,11 @@ from langchain.schema import Document
7
7
  from langchain.schema.embeddings import Embeddings
8
8
  from langchain.schema.vectorstore import VST, VectorStore
9
9
 
10
+ from janus.converter.diagram import DiagramGenerator
11
+ from janus.converter.requirements import RequirementsDocumenter
12
+ from janus.converter.translate import Translator
10
13
  from janus.language.block import CodeBlock, TranslatedCodeBlock
11
14
 
12
- from ..diagram import DiagramGenerator
13
- from ..requirements import RequirementsDocumenter
14
- from ..translate import Translator
15
-
16
- # from langchain.vectorstores import Chroma
17
-
18
-
19
- # from ..utils.enums import EmbeddingType
20
-
21
-
22
- def print_query_results(query, n_results):
23
- # print(f"\n{query}")
24
- # count = 1
25
- # for t in n_results:
26
- # short_code = (
27
- # (t[0].page_content[0:50] + "..")
28
- # if (len(t[0].page_content) > 50)
29
- # else t[0].page_content
30
- # )
31
- # return_index = short_code.find("\n")
32
- # if -1 != return_index:
33
- # short_code = short_code[0:return_index] + ".."
34
- # print(
35
- # f"{count}. @ {t[0].metadata['start_line']}-{t[0].metadata['end_line']}"
36
- # f" -- {t[1]} -- {short_code}"
37
- # )
38
- # count += 1
39
- pass
40
-
41
15
 
42
16
  class MockCollection(VectorStore):
43
17
  """Vector store for testing"""
@@ -65,14 +39,6 @@ class MockCollection(VectorStore):
65
39
  raise NotImplementedError("from_texts() not implemented!")
66
40
 
67
41
 
68
- # class MockEmbeddingsFactory(EmbeddingsFactory):
69
- # """Embeddings for testing - uses MockCollection"""
70
- #
71
- # def get_embeddings(self) -> Embeddings:
72
- # return MockCollection()
73
- #
74
-
75
-
76
42
  class TestTranslator(unittest.TestCase):
77
43
  """Tests for the Translator class."""
78
44
 
@@ -105,200 +71,6 @@ class TestTranslator(unittest.TestCase):
105
71
  # unit tests anyway
106
72
  self.assertTrue(python_file.exists())
107
73
 
108
- # def test_embeddings(self):
109
- # """Testing access to embeddings"""
110
- # vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
111
- # self.assertIsInstance(vector_store, Chroma, "Unexpected vector store type!")
112
- # self.assertEqual(
113
- # 0, vector_store._collection.count(), "Non-empty initial vector store?"
114
- # )
115
- #
116
- # self.translator.set_model("llama")
117
- # self.translator._load_parameters()
118
- # vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
119
- # self.assertIsInstance(vector_store, Chroma)
120
- # self.assertEqual(
121
- # 0, vector_store._collection.count(), "Non-empty initial vector store?"
122
- # )
123
-
124
- # def test_embed_split_source(self):
125
- # """Characterize _embed method"""
126
- # mock_embeddings = MockEmbeddingsFactory()
127
- # self.translator.set_embeddings(mock_embeddings)
128
- # self.translator._load_parameters()
129
- # input_block = self.translator.splitter.split(self.test_file)
130
- # self.assertIsNone(
131
- # input_block.text, "Root node of input text shouldn't contain text"
132
- # )
133
- # self.assertIsNone(input_block.embedding_id, "Precondition failed")
134
- #
135
- # result = self.translator._embed(
136
- # input_block, EmbeddingType.SOURCE, self.test_file.name
137
- # )
138
- #
139
- # self.assertFalse(result, "Nothing to embed, so should have no result")
140
- # self.assertIsNone(
141
- # input_block.embedding_id, "Embeddings should not have changed")
142
-
143
- # def test_embed_has_values_for_each_non_empty_node(self):
144
- # """Characterize our sample fortran file"""
145
- # mock_embeddings = MockEmbeddingsFactory()
146
- # self.translator.set_embeddings(mock_embeddings)
147
- # self.translator._load_parameters()
148
- # input_block = self.translator.splitter.split(self.test_file)
149
- # self.translator._embed_nodes_recursively(
150
- # input_block, EmbeddingType.SOURCE, self.test_file.name
151
- # )
152
- # has_text_count = 0
153
- # has_embeddings_count = 0
154
- # nodes = [input_block]
155
- # while nodes:
156
- # node = nodes.pop(0)
157
- # if node.text:
158
- # has_text_count += 1
159
- # if node.embedding_id:
160
- # has_embeddings_count += 1
161
- # nodes.extend(node.children)
162
- # self.assertEqual(
163
- # self.TEST_FILE_EMBEDDING_COUNT,
164
- # has_text_count,
165
- # "Parsing of test_file has changed!",
166
- # )
167
- # self.assertEqual(
168
- # self.TEST_FILE_EMBEDDING_COUNT,
169
- # has_embeddings_count,
170
- # "Not all non-empty nodes have embeddings!",
171
- # )
172
-
173
- # def test_embed_nodes_recursively(self):
174
- # mock_embeddings = MockEmbeddingsFactory()
175
- # self.translator.set_embeddings(mock_embeddings)
176
- # self.translator._load_parameters()
177
- # input_block = self.translator.splitter.split(self.test_file)
178
- # self.translator._embed_nodes_recursively(
179
- # input_block, EmbeddingType.SOURCE, self.test_file.name
180
- # )
181
- # nodes = [input_block]
182
- # while nodes:
183
- # node = nodes.pop(0)
184
- # self.assertEqual(node.text is not None, node.embedding_id is not None)
185
- # nodes.extend(node.children)
186
-
187
- # @pytest.mark.slow
188
- # def test_translate_file_adds_source_embeddings(self):
189
- # mock_embeddings = MockEmbeddingsFactory()
190
- # self.translator.set_embeddings(mock_embeddings)
191
- # self.translator._load_parameters()
192
- # vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
193
- # self.assertEqual(0, vector_store._add_texts_calls, "precondition")
194
- #
195
- # self.translator.translate_file(self.test_file)
196
- #
197
- # self.assertEqual(
198
- # self.TEST_FILE_EMBEDDING_COUNT,
199
- # vector_store._add_texts_calls,
200
- # "Did not find expected source embeddings",
201
- # )
202
-
203
- # @pytest.mark.slow
204
- # def test_embeddings_usage(self):
205
- # """Noodling on use of embeddings
206
- # To see results have to uncomment print_query_results() above
207
- # """
208
- # input_block = self.translator.splitter.split(self.test_file)
209
- # self.translator._embed_nodes_recursively(
210
- # input_block, EmbeddingType.SOURCE, self.test_file.name
211
- # )
212
- # vector_store = self.translator.embeddings(EmbeddingType.SOURCE)
213
- #
214
- # # this symbol has the lowest relevance scores of any in this test, but
215
- # # still not very low; multiple embedded nodes contain it
216
- # QUERY_STRING = "IWX_BAND_START"
217
- # query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
218
- # n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
219
- # embedding=query,
220
- # k=10,
221
- # where_document={"$contains": QUERY_STRING},
222
- # )
223
- # self.assertTrue(len(n_results) > 1, "Why was valid symbol not found?")
224
- # print_query_results(QUERY_STRING, n_results)
225
-
226
- # in the XYZZY test, the least dissimilar results were the start and finish lines
227
- # 0, and 415, which produced a similarity score of 0.47:
228
-
229
- # QUERY_STRING = "XYZZY"
230
- # query = self.translator._embeddings.embed_query(QUERY_STRING)
231
- # n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
232
- # embedding=query,
233
- # k=10,
234
- # # filter={"end_line": 15},
235
- # # filter={"$and": [{"end_line": 15}, {"tokens": {"$gte": 21}}]},
236
- # # where_document={"$contains": QUERY_STRING},
237
- # )
238
- # print_query_results(QUERY_STRING, n_results)
239
- # # self.assertTrue(len(n_results) == 0, "Invalid symbol was found?")
240
-
241
- # # only returns a single result because only 1 embedded node contains
242
- # # CSV_ICASEARR:
243
- # QUERY_STRING = "What is the use of CSV_ICASEARR?"
244
- # query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
245
- # n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
246
- # embedding=query,
247
- # k=10,
248
- # # where_document={"$contains": QUERY_STRING},
249
- # where_document={"$contains": "CSV_ICASEARR"},
250
- # )
251
- # print_query_results(QUERY_STRING, n_results)
252
- # self.assertTrue(len(n_results) == 1, "Was splitting changed?")
253
- #
254
- # # trimmed out some characters from line 43, and still not very similar scoring
255
- # QUERY_STRING = "IYL_EDGEBUFFER EDGEBUFFER IGN_MASK CELLSIZE"
256
- # query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
257
- # n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
258
- # embedding=query,
259
- # k=10,
260
- # # where_document={"$contains": QUERY_STRING},
261
- # )
262
- # print_query_results(QUERY_STRING, n_results)
263
- #
264
- # # random string (as bad as XYZZY), but searching for a specific line
265
- # QUERY_STRING = "ghost in the invisible moon"
266
- # query = self.translator._embeddings._embeddings.embed_query(QUERY_STRING)
267
- # n_results = vector_store.similarity_search_by_vector_with_relevance_scores(
268
- # embedding=query,
269
- # k=10,
270
- # filter={"$and": [{"end_line": 90}, {"tokens": {"$gte": 21}}]},
271
- # )
272
- # print_query_results(QUERY_STRING, n_results)
273
- # self.assertTrue(len(n_results) == 1, "Was splitting changed?")
274
-
275
- # @pytest.mark.slow
276
- # def test_document_embeddings_added_by_translate(self):
277
- # vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
278
- # self.assertEqual(0, vector_store._add_texts_calls, "Precondition failed")
279
- # self.req_translator.translate(self.test_file.parent, self.test_file.parent,
280
- # True)
281
- # self.assertTrue(vector_store._add_texts_calls > 0, "Why no documentation?")
282
-
283
- # @pytest.mark.slow
284
- # def test_embed_requirements(self):
285
- # vector_store = self.req_translator.embeddings(EmbeddingType.REQUIREMENT)
286
- # translated = self.req_translator.translate_file(self.test_file)
287
- # self.assertEqual(
288
- # 0,
289
- # vector_store._add_texts_calls,
290
- # "Unexpected requirements added in translate_file",
291
- # )
292
- # result = self.req_translator._embed(
293
- # translated, EmbeddingType.REQUIREMENT, self.test_file.name
294
- # )
295
- # self.assertFalse(result, "No text in root node, so should generate no docs")
296
- # self.assertIsNotNone(translated.children[0].text, "Data changed?")
297
- # result = self.req_translator._embed(
298
- # translated.children[0], EmbeddingType.REQUIREMENT, self.test_file.name
299
- # )
300
- # self.assertTrue(result, "No docs generated for first child node?")
301
-
302
74
  def test_invalid_selections(self) -> None:
303
75
  """Tests that settings values for the translator will raise exceptions"""
304
76
  self.assertRaises(
@@ -4,8 +4,8 @@ from unittest.mock import MagicMock
4
4
 
5
5
  import pytest
6
6
 
7
- from ...utils.enums import EmbeddingType
8
- from ..collections import Collections
7
+ from janus.embedding.collections import Collections
8
+ from janus.utils.enums import EmbeddingType
9
9
 
10
10
 
11
11
  class TestCollections(unittest.TestCase):
@@ -2,7 +2,7 @@ import unittest
2
2
  from pathlib import Path
3
3
  from unittest.mock import patch
4
4
 
5
- from ..database import ChromaEmbeddingDatabase, uri_to_path
5
+ from janus.embedding.database import ChromaEmbeddingDatabase, uri_to_path
6
6
 
7
7
 
8
8
  class TestDatabase(unittest.TestCase):
@@ -5,9 +5,9 @@ from unittest.mock import MagicMock
5
5
 
6
6
  from chromadb.api.client import Client
7
7
 
8
- from ...language.treesitter import TreeSitterSplitter
9
- from ...utils.enums import EmbeddingType
10
- from ..vectorize import Vectorizer, VectorizerFactory
8
+ from janus.embedding.vectorize import Vectorizer, VectorizerFactory
9
+ from janus.language.treesitter import TreeSitterSplitter
10
+ from janus.utils.enums import EmbeddingType
11
11
 
12
12
 
13
13
  class MockDBVectorizer(VectorizerFactory):
@@ -5,8 +5,8 @@ from typing import Dict, Optional, Sequence
5
5
  from chromadb import Client, Collection
6
6
  from langchain_community.vectorstores import Chroma
7
7
 
8
- from ..utils.enums import EmbeddingType
9
- from .embedding_models_info import load_embedding_model
8
+ from janus.embedding.embedding_models_info import load_embedding_model
9
+ from janus.utils.enums import EmbeddingType
10
10
 
11
11
  # See https://docs.trychroma.com/telemetry#in-chromas-backend-using-environment-variables
12
12
  os.environ["ANONYMIZED_TELEMETRY"] = "False"
@@ -5,7 +5,7 @@ from urllib.request import url2pathname
5
5
 
6
6
  import chromadb
7
7
 
8
- from ..utils.logger import create_logger
8
+ from janus.utils.logger import create_logger
9
9
 
10
10
  log = create_logger(__name__)
11
11
 
@@ -8,7 +8,7 @@ from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEm
8
8
  from langchain_core.embeddings import Embeddings
9
9
  from langchain_openai import OpenAIEmbeddings
10
10
 
11
- from ..utils.logger import create_logger
11
+ from janus.utils.logger import create_logger
12
12
 
13
13
  load_dotenv()
14
14
 
@@ -6,10 +6,10 @@ from typing import Any, Dict, Optional, Sequence
6
6
  from chromadb import Client, Collection
7
7
  from langchain_community.vectorstores import Chroma
8
8
 
9
- from ..language.block import CodeBlock, TranslatedCodeBlock
10
- from ..utils.enums import EmbeddingType
11
- from .collections import Collections
12
- from .database import ChromaEmbeddingDatabase
9
+ from janus.embedding.collections import Collections
10
+ from janus.embedding.database import ChromaEmbeddingDatabase
11
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
12
+ from janus.utils.enums import EmbeddingType
13
13
 
14
14
 
15
15
  class Vectorizer(object):
@@ -59,7 +59,7 @@ class Vectorizer(object):
59
59
  self,
60
60
  code_block: CodeBlock,
61
61
  collection_name: EmbeddingType | str,
62
- filename: str # perhaps this should be a relative path from the source, but for
62
+ filename: str, # perhaps this should be a relative path from the source, but for
63
63
  # now we're all in 1 directory
64
64
  ) -> None:
65
65
  """Calculate `code_block` embedding, returning success & storing in `embedding_id`
@@ -1,6 +1,6 @@
1
1
  import unittest
2
2
 
3
- from ..combine import CodeBlock, Combiner, TranslatedCodeBlock
3
+ from janus.language.combine import CodeBlock, Combiner, TranslatedCodeBlock
4
4
 
5
5
 
6
6
  class TestCombiner(unittest.TestCase):
@@ -1,6 +1,6 @@
1
1
  import unittest
2
2
 
3
- from ..splitter import Splitter
3
+ from janus.language.splitter import Splitter
4
4
 
5
5
 
6
6
  class TestSplitter(unittest.TestCase):
@@ -1,9 +1,9 @@
1
1
  import unittest
2
2
  from pathlib import Path
3
3
 
4
- from ....llm import load_model
5
- from ...combine import Combiner
6
- from ..alc import AlcSplitter
4
+ from janus.language.alc import AlcSplitter
5
+ from janus.language.combine import Combiner
6
+ from janus.llm import load_model
7
7
 
8
8
 
9
9
  class TestAlcSplitter(unittest.TestCase):
janus/language/alc/alc.py CHANGED
@@ -1,10 +1,10 @@
1
1
  from langchain.schema.language_model import BaseLanguageModel
2
2
 
3
- from ...utils.logger import create_logger
4
- from ..block import CodeBlock
5
- from ..combine import Combiner
6
- from ..node import NodeType
7
- from ..treesitter import TreeSitterSplitter
3
+ from janus.language.block import CodeBlock
4
+ from janus.language.combine import Combiner
5
+ from janus.language.node import NodeType
6
+ from janus.language.treesitter import TreeSitterSplitter
7
+ from janus.utils.logger import create_logger
8
8
 
9
9
  log = create_logger(__name__)
10
10
 
@@ -5,8 +5,8 @@ from unittest.mock import patch
5
5
 
6
6
  import pytest
7
7
 
8
- from ....llm import load_model
9
- from ..binary import BinarySplitter
8
+ from janus.language.binary import BinarySplitter
9
+ from janus.llm import load_model
10
10
 
11
11
 
12
12
  class TestBinarySplitter(unittest.TestCase):
@@ -7,11 +7,11 @@ from pathlib import Path
7
7
  import tree_sitter
8
8
  from langchain.schema.language_model import BaseLanguageModel
9
9
 
10
- from ...utils.enums import LANGUAGES
11
- from ...utils.logger import create_logger
12
- from ..block import CodeBlock
13
- from ..combine import Combiner
14
- from ..treesitter import TreeSitterSplitter
10
+ from janus.language.block import CodeBlock
11
+ from janus.language.combine import Combiner
12
+ from janus.language.treesitter import TreeSitterSplitter
13
+ from janus.utils.enums import LANGUAGES
14
+ from janus.utils.logger import create_logger
15
15
 
16
16
  log = create_logger(__name__)
17
17
 
janus/language/block.py CHANGED
@@ -1,8 +1,8 @@
1
1
  from functools import total_ordering
2
2
  from typing import ForwardRef, Hashable, Optional, Tuple
3
3
 
4
- from ..utils.logger import create_logger
5
- from .node import NodeType
4
+ from janus.language.node import NodeType
5
+ from janus.utils.logger import create_logger
6
6
 
7
7
  log = create_logger(__name__)
8
8
 
janus/language/combine.py CHANGED
@@ -1,6 +1,6 @@
1
- from ..utils.logger import create_logger
2
- from .block import CodeBlock, TranslatedCodeBlock
3
- from .file import FileManager
1
+ from janus.language.block import CodeBlock, TranslatedCodeBlock
2
+ from janus.language.file import FileManager
3
+ from janus.utils.logger import create_logger
4
4
 
5
5
  log = create_logger(__name__)
6
6
 
janus/language/file.py CHANGED
@@ -1,5 +1,5 @@
1
- from ..utils.enums import LANGUAGES
2
- from ..utils.logger import create_logger
1
+ from janus.utils.enums import LANGUAGES
2
+ from janus.utils.logger import create_logger
3
3
 
4
4
  log = create_logger(__name__)
5
5
 
@@ -1,9 +1,9 @@
1
1
  import unittest
2
2
  from pathlib import Path
3
3
 
4
- from ....llm import load_model
5
- from ...combine import Combiner
6
- from ..mumps import MumpsSplitter
4
+ from janus.language.combine import Combiner
5
+ from janus.language.mumps import MumpsSplitter
6
+ from janus.llm import load_model
7
7
 
8
8
 
9
9
  class TestMumpsSplitter(unittest.TestCase):
@@ -2,11 +2,11 @@ import re
2
2
 
3
3
  from langchain.schema.language_model import BaseLanguageModel
4
4
 
5
- from ...utils.logger import create_logger
6
- from ..block import CodeBlock
7
- from ..combine import Combiner
8
- from ..node import NodeType
9
- from ..splitter import Splitter
5
+ from janus.language.block import CodeBlock
6
+ from janus.language.combine import Combiner
7
+ from janus.language.node import NodeType
8
+ from janus.language.splitter import Splitter
9
+ from janus.utils.logger import create_logger
10
10
 
11
11
  log = create_logger(__name__)
12
12
 
@@ -1,7 +1,7 @@
1
1
  import re
2
2
  from dataclasses import dataclass
3
3
 
4
- from ...utils.logger import create_logger
4
+ from janus.utils.logger import create_logger
5
5
 
6
6
  log = create_logger(__name__)
7
7
 
@@ -1,4 +1,4 @@
1
- from .basic_splitter import FileSplitter
2
- from .chunk_splitter import ChunkSplitter
3
- from .simple_ast import get_flexible_ast, get_strict_ast
4
- from .tag_splitter import TagSplitter
1
+ from janus.language.naive.basic_splitter import FileSplitter
2
+ from janus.language.naive.chunk_splitter import ChunkSplitter
3
+ from janus.language.naive.simple_ast import get_flexible_ast, get_strict_ast
4
+ from janus.language.naive.tag_splitter import TagSplitter
@@ -1,7 +1,7 @@
1
- from ..block import CodeBlock
2
- from ..naive.chunk_splitter import ChunkSplitter
3
- from ..naive.registry import register_splitter
4
- from ..splitter import FileSizeError
1
+ from janus.language.block import CodeBlock
2
+ from janus.language.naive.chunk_splitter import ChunkSplitter
3
+ from janus.language.naive.registry import register_splitter
4
+ from janus.language.splitter import FileSizeError
5
5
 
6
6
 
7
7
  @register_splitter("file")
@@ -1,7 +1,7 @@
1
- from ..block import CodeBlock
2
- from ..node import NodeType
3
- from ..splitter import Splitter
4
- from .registry import register_splitter
1
+ from janus.language.block import CodeBlock
2
+ from janus.language.naive.registry import register_splitter
3
+ from janus.language.node import NodeType
4
+ from janus.language.splitter import Splitter
5
5
 
6
6
 
7
7
  @register_splitter("chunk")
@@ -1,6 +1,6 @@
1
1
  from typing import Callable, Dict
2
2
 
3
- from ..splitter import Splitter
3
+ from janus.language.splitter import Splitter
4
4
 
5
5
  CUSTOM_SPLITTERS: Dict[str, Callable[..., Splitter]] = dict()
6
6
 
@@ -1,8 +1,8 @@
1
- from ...utils.enums import LANGUAGES
2
- from ..alc.alc import AlcSplitter
3
- from ..mumps.mumps import MumpsSplitter
4
- from ..treesitter import TreeSitterSplitter
5
- from .registry import register_splitter
1
+ from janus.language.alc.alc import AlcSplitter
2
+ from janus.language.mumps.mumps import MumpsSplitter
3
+ from janus.language.naive.registry import register_splitter
4
+ from janus.language.treesitter import TreeSitterSplitter
5
+ from janus.utils.enums import LANGUAGES
6
6
 
7
7
 
8
8
  @register_splitter("ast-flex")
@@ -1,7 +1,7 @@
1
- from ..block import CodeBlock
2
- from ..node import NodeType
3
- from ..splitter import Splitter
4
- from .registry import register_splitter
1
+ from janus.language.block import CodeBlock
2
+ from janus.language.naive.registry import register_splitter
3
+ from janus.language.node import NodeType
4
+ from janus.language.splitter import Splitter
5
5
 
6
6
 
7
7
  @register_splitter("tag")
janus/language/node.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import NewType, Tuple
2
2
 
3
- from ..utils.logger import create_logger
3
+ from janus.utils.logger import create_logger
4
4
 
5
5
  log = create_logger(__name__)
6
6
 
@@ -6,10 +6,10 @@ from typing import List
6
6
  import tiktoken
7
7
  from langchain.schema.language_model import BaseLanguageModel
8
8
 
9
- from ..utils.logger import create_logger
10
- from .block import CodeBlock
11
- from .file import FileManager
12
- from .node import NodeType
9
+ from janus.language.block import CodeBlock
10
+ from janus.language.file import FileManager
11
+ from janus.language.node import NodeType
12
+ from janus.utils.logger import create_logger
13
13
 
14
14
  log = create_logger(__name__)
15
15