semantic-chunker-langchain 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: semantic-chunker-langchain
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Token-aware, LangChain-compatible semantic chunker with PDF and layout support
5
5
  License: MIT
6
6
  Author: Prajwal Shivaji Mandale
@@ -42,7 +42,7 @@ A **token-aware**, **LangChain-compatible** chunker that splits text (from PDF,
42
42
 
43
43
  ---
44
44
 
45
- ## 📦 Installation
45
+ ## 📆 Installation
46
46
 
47
47
  ```bash
48
48
  pip install semantic-chunker-langchain
@@ -62,6 +62,7 @@ semantic-chunker sample.pdf --txt chunks.txt --json chunks.json
62
62
 
63
63
  ### 🔸 From Code
64
64
 
65
+ ```python
65
66
  from semantic_chunker_langchain.chunker import SemanticChunker, SimpleSemanticChunker
66
67
  from semantic_chunker_langchain.extractors.pdf import extract_pdf
67
68
  from semantic_chunker_langchain.outputs.formatter import write_to_txt
@@ -79,7 +80,7 @@ write_to_txt(chunks, "output.txt")
79
80
  # Using SimpleSemanticChunker
80
81
  simple_chunker = SimpleSemanticChunker(model_name="gpt-3.5-turbo")
81
82
  simple_chunks = simple_chunker.split_documents(docs)
82
-
83
+ ```
83
84
 
84
85
  ### 🔸 Convert to Retriever
85
86
 
@@ -90,7 +91,7 @@ retriever = chunker.to_retriever(chunks, embedding=OpenAIEmbeddings())
90
91
 
91
92
  ---
92
93
 
93
- ## 🧪 Testing
94
+ ## 📊 Testing
94
95
 
95
96
  ```bash
96
97
  poetry run pytest tests/
@@ -20,7 +20,7 @@ A **token-aware**, **LangChain-compatible** chunker that splits text (from PDF,
20
20
 
21
21
  ---
22
22
 
23
- ## 📦 Installation
23
+ ## 📆 Installation
24
24
 
25
25
  ```bash
26
26
  pip install semantic-chunker-langchain
@@ -40,6 +40,7 @@ semantic-chunker sample.pdf --txt chunks.txt --json chunks.json
40
40
 
41
41
  ### 🔸 From Code
42
42
 
43
+ ```python
43
44
  from semantic_chunker_langchain.chunker import SemanticChunker, SimpleSemanticChunker
44
45
  from semantic_chunker_langchain.extractors.pdf import extract_pdf
45
46
  from semantic_chunker_langchain.outputs.formatter import write_to_txt
@@ -57,7 +58,7 @@ write_to_txt(chunks, "output.txt")
57
58
  # Using SimpleSemanticChunker
58
59
  simple_chunker = SimpleSemanticChunker(model_name="gpt-3.5-turbo")
59
60
  simple_chunks = simple_chunker.split_documents(docs)
60
-
61
+ ```
61
62
 
62
63
  ### 🔸 Convert to Retriever
63
64
 
@@ -68,7 +69,7 @@ retriever = chunker.to_retriever(chunks, embedding=OpenAIEmbeddings())
68
69
 
69
70
  ---
70
71
 
71
- ## 🧪 Testing
72
+ ## 📊 Testing
72
73
 
73
74
  ```bash
74
75
  poetry run pytest tests/
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "semantic-chunker-langchain"
3
- version = "0.1.2"
3
+ version = "0.1.3"
4
4
  description = "Token-aware, LangChain-compatible semantic chunker with PDF and layout support"
5
5
  authors = ["Prajwal Shivaji Mandale <prajwal.mandale333@gmail.com>","Sudhnwa Ghorpade <sudhnwa.ghorpade@gmail.com>"]
6
6
  license = "MIT"