pikuri-vectordb 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/pikuri/vector_db/tools/read.rb +15 -14
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0391399acefa74d05159cc96400db1c870bb2d0afbd7e924bf508932b9a4f6cc'
|
|
4
|
+
data.tar.gz: 81c65ff94ec867c0b1094846cda8edb4dad9d8e2e0a23180abaaca979d26b64f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6e069b736507fdca7277fdbf75ca2a9e831597095ecf0e5dfe89e781b24faf6961baa0bf567ace1cefdfdb7588314c56732e5ef45632792af117e0e57f0ac9ae
|
|
7
|
+
data.tar.gz: ed2665499e224117cdb41f5d53fab6737f132b4a38d445853f4e79db7654602458f3615c62c58f2904d58f52764dcc1055b463de1ca57bc63510799069b2a4f4
|
data/README.md
CHANGED
|
@@ -106,8 +106,10 @@ piece — each swappable via the Extension's keyword arguments:
|
|
|
106
106
|
correctness.
|
|
107
107
|
|
|
108
108
|
Text extraction reuses `Pikuri::FileType.read_as_text` from
|
|
109
|
-
pikuri-core — plain text / Markdown
|
|
110
|
-
|
|
109
|
+
pikuri-core — plain text / Markdown out of the box; for a PDF
|
|
110
|
+
corpus, register the [pikuri-pdf](../pikuri-pdf) extractor in your
|
|
111
|
+
host script (`Pikuri::Extractors::PDF.register` — `pikuri-corpus`
|
|
112
|
+
does exactly that). HTML extraction is a deferred follow-up.
|
|
111
113
|
|
|
112
114
|
## Demo: `pikuri-corpus`
|
|
113
115
|
|
|
@@ -46,7 +46,7 @@ module Pikuri
|
|
|
46
46
|
# line-windowed by {Pikuri::FileType.read_as_text_paged} — the same
|
|
47
47
|
# windower (and the same {DEFAULT_LIMIT}-line / 50 KB caps) that
|
|
48
48
|
# backs {Pikuri::Workspace::Read}, returning a
|
|
49
|
-
# {Pikuri::
|
|
49
|
+
# {Pikuri::Extractor::Page} this tool formats. Unlike +Workspace::Read+
|
|
50
50
|
# there is no +cat -n+ line-number prefix: nothing downstream edits
|
|
51
51
|
# these documents (the citation unit is the +source+ path, not a
|
|
52
52
|
# line), and dropping the prefix saves tokens on exactly the
|
|
@@ -55,23 +55,24 @@ module Pikuri
|
|
|
55
55
|
# == Errors the LLM can react to
|
|
56
56
|
#
|
|
57
57
|
# Extraction goes through {Pikuri::FileType.read_as_text_paged},
|
|
58
|
-
# which routes the same
|
|
59
|
-
# {Pikuri::FileType.read_as_text} does — so what you
|
|
60
|
-
# what was indexed (modulo edits to the file
|
|
61
|
-
# +"--- Page N ---"+ markers
|
|
62
|
-
# file / a malformed PDF all come back as
|
|
63
|
-
# rather than raising, per CLAUDE.md
|
|
64
|
-
# failures the agent reacts to, not
|
|
58
|
+
# which routes through the same {Pikuri::Extractor} registry the
|
|
59
|
+
# {Indexer}'s {Pikuri::FileType.read_as_text} does — so what you
|
|
60
|
+
# read matches what was indexed exactly (modulo edits to the file
|
|
61
|
+
# since), +"--- Page N ---"+ PDF markers included. Images /
|
|
62
|
+
# binaries / a vanished file / a malformed PDF all come back as
|
|
63
|
+
# +"Error: ..."+ observations rather than raising, per CLAUDE.md
|
|
64
|
+
# "Errors are loud" (these are failures the agent reacts to, not
|
|
65
|
+
# pikuri bugs).
|
|
65
66
|
class Read < Pikuri::Tool
|
|
66
67
|
# @return [Integer] default value of the +limit+ parameter
|
|
67
68
|
# (number of lines returned per call). Aliases the shared
|
|
68
|
-
# {Pikuri::
|
|
69
|
-
DEFAULT_LIMIT = Pikuri::
|
|
69
|
+
# {Pikuri::Extractor::PAGE_DEFAULT_LIMIT}.
|
|
70
|
+
DEFAULT_LIMIT = Pikuri::Extractor::PAGE_DEFAULT_LIMIT
|
|
70
71
|
|
|
71
72
|
# @return [String] human-readable form of the shared byte cap
|
|
72
|
-
# ({Pikuri::
|
|
73
|
+
# ({Pikuri::Extractor::PAGE_MAX_BYTES}) for the continuation
|
|
73
74
|
# marker.
|
|
74
|
-
MAX_BYTES_LABEL = "#{Pikuri::
|
|
75
|
+
MAX_BYTES_LABEL = "#{Pikuri::Extractor::PAGE_MAX_BYTES / 1024} KB"
|
|
75
76
|
|
|
76
77
|
# @return [String] static description shown to the LLM,
|
|
77
78
|
# opencode-shape (summary + +Usage:+ bullets).
|
|
@@ -166,13 +167,13 @@ module Pikuri
|
|
|
166
167
|
end
|
|
167
168
|
private_class_method :contained?
|
|
168
169
|
|
|
169
|
-
# Format a {Pikuri::
|
|
170
|
+
# Format a {Pikuri::Extractor::Page} as the observation: the lines
|
|
170
171
|
# joined plain (no +cat -n+ prefix), followed by a trailer keyed
|
|
171
172
|
# on the citation +source+ that tells the model whether to page
|
|
172
173
|
# on. PDF pages already carry their +"--- Page N ---"+ markers
|
|
173
174
|
# from the extractor.
|
|
174
175
|
#
|
|
175
|
-
# @param page [Pikuri::
|
|
176
|
+
# @param page [Pikuri::Extractor::Page]
|
|
176
177
|
# @param source [String] the citation, used in the empty /
|
|
177
178
|
# end-of-document messages.
|
|
178
179
|
# @return [String]
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pikuri-vectordb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Martin Vysny
|
|
@@ -16,28 +16,28 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - '='
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.0.
|
|
19
|
+
version: 0.0.6
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - '='
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.0.
|
|
26
|
+
version: 0.0.6
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: pikuri-subagents
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - '='
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: 0.0.
|
|
33
|
+
version: 0.0.6
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - '='
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: 0.0.
|
|
40
|
+
version: 0.0.6
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: listen
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|