pikuri 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +62 -0
  3. data/GETTING_STARTED.md +223 -0
  4. data/LICENSE +21 -0
  5. data/README.md +193 -0
  6. data/lib/pikuri/agent/chat_transport.rb +41 -0
  7. data/lib/pikuri/agent/context_window_detector.rb +101 -0
  8. data/lib/pikuri/agent/listener/in_memory_message_list.rb +33 -0
  9. data/lib/pikuri/agent/listener/message_listener.rb +93 -0
  10. data/lib/pikuri/agent/listener/step_limit.rb +97 -0
  11. data/lib/pikuri/agent/listener/terminal.rb +137 -0
  12. data/lib/pikuri/agent/listener/token_log.rb +166 -0
  13. data/lib/pikuri/agent/listener_list.rb +113 -0
  14. data/lib/pikuri/agent/message.rb +61 -0
  15. data/lib/pikuri/agent/synthesizer.rb +120 -0
  16. data/lib/pikuri/agent/tokens.rb +56 -0
  17. data/lib/pikuri/agent.rb +286 -0
  18. data/lib/pikuri/subprocess.rb +166 -0
  19. data/lib/pikuri/tool/bash.rb +272 -0
  20. data/lib/pikuri/tool/calculator.rb +82 -0
  21. data/lib/pikuri/tool/confirmer.rb +96 -0
  22. data/lib/pikuri/tool/edit.rb +196 -0
  23. data/lib/pikuri/tool/fetch.rb +167 -0
  24. data/lib/pikuri/tool/glob.rb +310 -0
  25. data/lib/pikuri/tool/grep.rb +338 -0
  26. data/lib/pikuri/tool/parameters.rb +314 -0
  27. data/lib/pikuri/tool/read.rb +254 -0
  28. data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
  29. data/lib/pikuri/tool/scraper/html.rb +285 -0
  30. data/lib/pikuri/tool/scraper/pdf.rb +54 -0
  31. data/lib/pikuri/tool/scraper/simple.rb +177 -0
  32. data/lib/pikuri/tool/search/brave.rb +184 -0
  33. data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
  34. data/lib/pikuri/tool/search/engines.rb +154 -0
  35. data/lib/pikuri/tool/search/exa.rb +217 -0
  36. data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
  37. data/lib/pikuri/tool/search/result.rb +29 -0
  38. data/lib/pikuri/tool/skill.rb +80 -0
  39. data/lib/pikuri/tool/skill_catalog.rb +376 -0
  40. data/lib/pikuri/tool/sub_agent.rb +102 -0
  41. data/lib/pikuri/tool/web_scrape.rb +117 -0
  42. data/lib/pikuri/tool/web_search.rb +38 -0
  43. data/lib/pikuri/tool/workspace.rb +150 -0
  44. data/lib/pikuri/tool/write.rb +170 -0
  45. data/lib/pikuri/tool.rb +118 -0
  46. data/lib/pikuri/url_cache.rb +106 -0
  47. data/lib/pikuri/version.rb +10 -0
  48. data/lib/pikuri.rb +165 -0
  49. data/prompts/coding-system-prompt.txt +28 -0
  50. data/prompts/pikuri-chat.txt +15 -0
  51. metadata +259 -0
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
5
+ module Pikuri
6
+ class Tool
7
+ # Defines which paths the agent can see and write to. Subclass and
8
+ # implement {#cwd}, {#resolve_for_read}, and {#resolve_for_write}.
9
+ # Returned Pathnames are absolute, post-symlink-resolution.
10
+ #
11
+ # == Read-set vs. write-set
12
+ #
13
+ # The two resolve methods exist because a future workspace may admit
14
+ # paths for reading that it refuses for writing — e.g. a multi-root
15
+ # workspace where +~/.claude/skills+ is readable but only the project
16
+ # CWD is writable. For v1's {Cwd} the two methods are the same: read-
17
+ # set and write-set are both the CWD subtree.
18
+ #
19
+ # == Existence is not the workspace's concern
20
+ #
21
+ # +resolve_for_read('foo.rb')+ succeeds (returns a +Pathname+) even if
22
+ # +foo.rb+ doesn't exist; the caller ({Tool::Read}) errors with
23
+ # file-not-found when it tries to open it. +resolve_for_write+
24
+ # tolerates entirely non-existent paths (Write can create
25
+ # +lib/new/dir/foo.rb+ even when +lib/new/+ doesn't exist) — the
26
+ # caller is responsible for any +mkdir_p+ before writing. This split
27
+ # keeps the workspace narrowly responsible for *containment*, not for
28
+ # filesystem-state checks.
29
+ #
30
+ # == Future extensions (out of scope for v1)
31
+ #
32
+ # * Multi-root workspace: +~/.claude/skills+ readable, CWD writable.
33
+ # * +Tool::Workspace::ALLOW_ALL+: no restriction, planned for Docker /
34
+ # dev-container mode.
35
+ class Workspace
36
+ # Raised for any path that resolves outside the workspace. Recoverable
37
+ # at the tool layer — tools rescue this and emit +"Error: ..."+
38
+ # observations so the LLM can self-correct on the next turn.
39
+ class Error < StandardError; end
40
+
41
+ # Project anchor: where shells run, where modifications are made.
42
+ # For {Cwd} this is the constructor's +root:+ (after +realpath+);
43
+ # a future +ALLOW_ALL+ would return the user's invocation
44
+ # directory.
45
+ #
46
+ # @return [Pathname]
47
+ # @raise [NotImplementedError] in the abstract base
48
+ def cwd
49
+ raise NotImplementedError, "#{self.class}#cwd must be implemented"
50
+ end
51
+
52
+ # Resolve a user-supplied path against the workspace's read-set.
53
+ # Returned Pathname is absolute and may not exist on disk; the
54
+ # caller validates existence separately.
55
+ #
56
+ # @param path [String] user-supplied path, absolute or relative
57
+ # @return [Pathname]
58
+ # @raise [Error] if the resolved path falls outside the read-set
59
+ # @raise [NotImplementedError] in the abstract base
60
+ def resolve_for_read(path)
61
+ raise NotImplementedError, "#{self.class}#resolve_for_read must be implemented"
62
+ end
63
+
64
+ # Resolve a user-supplied path against the workspace's write-set.
65
+ # Same shape as {#resolve_for_read}; semantically distinct so a
66
+ # future workspace can permit reading paths it refuses to write.
67
+ #
68
+ # @param path [String]
69
+ # @return [Pathname]
70
+ # @raise [Error] if the resolved path falls outside the write-set
71
+ # @raise [NotImplementedError] in the abstract base
72
+ def resolve_for_write(path)
73
+ raise NotImplementedError, "#{self.class}#resolve_for_write must be implemented"
74
+ end
75
+
76
+ # Locks read+write to a single subtree (the +root:+ passed at
77
+ # construction, with symlinks resolved). +cwd+ equals the root.
78
+ #
79
+ # == Containment algorithm
80
+ #
81
+ # +#resolve+ walks up the input path to its deepest existing
82
+ # ancestor, +realpath+'s that ancestor (resolving any symlinks in
83
+ # the existing portion), then verifies the resolved anchor is
84
+ # +@root+ or a descendant. Four cases the algorithm must handle:
85
+ #
86
+ # 1. +lib/foo.rb+ (exists) → +existing+ = full path, +anchor+ in
87
+ # root → returns the realpath'd file.
88
+ # 2. +lib/new/dir/foo.rb+ (intermediates missing) → walks up to
89
+ # +@root/lib+, +anchor+ in root → returns the intended new path
90
+ # (caller +mkdir_p+s the parent before writing).
91
+ # 3. +lib/../../etc/passwd+ (+..+ escape) → +cleanpath+ collapses
92
+ # +..+ syntactically, walks land outside +@root+ → +Error+.
93
+ # 4. +link/foo.rb+ where +link → /etc+ (symlink escape) → walks to
94
+ # +link+ (which exists), +realpath+ resolves through the
95
+ # symlink to +/etc+, outside +@root+ → +Error+.
96
+ #
97
+ # Pure lexical normalization (+cleanpath+ + prefix check) catches
98
+ # cases 1–3 but misses case 4. The walk-up step adds the
99
+ # +realpath+ pass on the existing prefix, closing that gap.
100
+ class Cwd < Workspace
101
+ # @param root [String, Pathname] absolute (or working-directory-
102
+ # relative) path to anchor the workspace at. +realpath+'d once
103
+ # so subsequent comparisons happen in canonical form.
104
+ # @raise [Errno::ENOENT] if +root+ does not exist; surfaces
105
+ # immediately so misconfigured callers fail loudly.
106
+ def initialize(root:)
107
+ @root = Pathname.new(root).realpath
108
+ end
109
+
110
+ # @return [Pathname]
111
+ def cwd
112
+ @root
113
+ end
114
+
115
+ # @param path [String]
116
+ # @return [Pathname]
117
+ # @raise [Error]
118
+ def resolve_for_read(path)
119
+ resolve(path)
120
+ end
121
+
122
+ # @param path [String]
123
+ # @return [Pathname]
124
+ # @raise [Error]
125
+ def resolve_for_write(path)
126
+ resolve(path)
127
+ end
128
+
129
+ private
130
+
131
+ # See class header for the algorithm rationale.
132
+ def resolve(path)
133
+ pn = Pathname.new(path)
134
+ pn = @root + pn unless pn.absolute?
135
+ cleaned = pn.cleanpath
136
+
137
+ existing = cleaned
138
+ existing = existing.parent until existing.exist? || existing.parent == existing
139
+ anchor = existing.realpath
140
+
141
+ unless anchor == @root || anchor.to_s.start_with?(@root.to_s + File::SEPARATOR)
142
+ raise Error, "path '#{path}' is outside the workspace '#{@root}'"
143
+ end
144
+
145
+ anchor + cleaned.relative_path_from(existing)
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+
5
+ module Pikuri
6
+ class Tool
7
+ # The +write+ tool, expressed as a {Tool} subclass: instantiating
8
+ # +Tool::Write.new(workspace: ws, confirmer: c)+ produces a tool whose
9
+ # {Tool#to_ruby_llm_tool} wiring is identical to any bundled tool's,
10
+ # so ruby_llm sees nothing special about it. Same shape as
11
+ # {Tool::SubAgent} and {Tool::Read} — workspace and confirmer are
12
+ # captured by the +execute+ closure at construction.
13
+ #
14
+ # == Policy
15
+ #
16
+ # Three branches based on the on-disk state of +path+:
17
+ #
18
+ # 1. *New file* — write, no prompt.
19
+ # 2. *Existing file, identical content* — return an +"Error: ..."+
20
+ # no-op observation *before* invoking the confirmer; don't ask the
21
+ # user to approve a write that wouldn't change the file. Comparison
22
+ # is byte-strict, in BINARY encoding (trailing-newline-only
23
+ # differences trigger the confirm path; encoding tags can't make
24
+ # equal bytes compare unequal).
25
+ # 3. *Existing file, content differs* — confirm with
26
+ # +"OK to overwrite <path>: <old> → <new> bytes?"+ via {Confirmer};
27
+ # on yes, write. On no, return a decline-Error observation.
28
+ #
29
+ # == Why ask-on-overwrite (Edit doesn't)
30
+ #
31
+ # Edit's +old_string+ argument is an implicit read-check: the model
32
+ # can't write a correct +old_string+ without having read the file, so
33
+ # blast radius is bounded by what the model actually knows about file
34
+ # state. Write has no such check, so a hallucinated 500-line +content+
35
+ # could clobber unread work. The confirmation prompt guards exactly
36
+ # the gap Edit's argument shape already covers.
37
+ #
38
+ # == Side effects
39
+ #
40
+ # Parent directories are created (+FileUtils.mkdir_p+) before the
41
+ # write — matches the +git add lib/new/dir/foo.rb+ mental model and
42
+ # mirrors opencode's and pi's behavior. Edge case: +mkdir_p+ succeeds
43
+ # but the write fails; an empty directory is left behind. Accepted
44
+ # for v1 — users have version control.
45
+ #
46
+ # No atomic temp-file+rename. Plain +File.write+, same as opencode and
47
+ # pi. The crash-safety story is "the user has git".
48
+ class Write < Tool
49
+ # Description shown to the LLM. Follows the opencode-shape (summary
50
+ # + +Usage:+ bullets) prescribed by the project's tool-description
51
+ # convention. Per-parameter constraints live in the parameter
52
+ # descriptions; the +Usage:+ bullets are for "when do I pick this?
53
+ # how does it chain with other tools?".
54
+ #
55
+ # @return [String]
56
+ DESCRIPTION = <<~DESC
57
+ Write a file to the workspace, creating parent directories as needed.
58
+
59
+ Usage:
60
+ - Use for new files or full-file rewrites; for partial changes use `edit` instead.
61
+ - Overwriting an existing file requires user confirmation; identical content is rejected as a no-op error — if you see that error, re-read the file rather than trying again.
62
+ - Parent directories are created automatically (mkdir -p).
63
+ - Writes the exact bytes supplied: no trailing-newline normalization, no encoding conversion.
64
+ - Paths outside the workspace are refused.
65
+ DESC
66
+
67
+ # @param workspace [Tool::Workspace] captured for path resolution;
68
+ # all writes route through +workspace.resolve_for_write+.
69
+ # @param confirmer [Tool::Confirmer] consulted before any overwrite
70
+ # of an existing file with non-identical content.
71
+ # @return [Write]
72
+ def initialize(workspace:, confirmer:)
73
+ super(
74
+ name: 'write',
75
+ description: DESCRIPTION,
76
+ parameters: Parameters.build { |p|
77
+ p.required_string :path,
78
+ 'Path to the file to write. Relative paths ' \
79
+ 'resolve against the workspace root, e.g. ' \
80
+ '"lib/foo.rb".'
81
+ p.required_string :content,
82
+ 'Full contents to write to the file, e.g. ' \
83
+ '"class Foo\nend\n".'
84
+ },
85
+ execute: ->(path:, content:) {
86
+ Write.write(workspace: workspace, confirmer: confirmer, path: path, content: content)
87
+ }
88
+ )
89
+ end
90
+
91
+ # Resolve +path+ against +workspace+, apply the three-branch policy
92
+ # (new / identical / differs), and return either a success
93
+ # observation or an +"Error: ..."+ observation.
94
+ #
95
+ # @param workspace [Tool::Workspace]
96
+ # @param confirmer [Tool::Confirmer]
97
+ # @param path [String] raw path as supplied by the LLM
98
+ # @param content [String] bytes to write
99
+ # @return [String] tool observation
100
+ def self.write(workspace:, confirmer:, path:, content:)
101
+ resolved = workspace.resolve_for_write(path)
102
+
103
+ if resolved.exist?
104
+ return "Error: #{path} is a directory" if resolved.directory?
105
+
106
+ existing = read_for_compare(resolved, path)
107
+
108
+ if existing == content.b
109
+ return "Error: #{path} already contains exactly this content — " \
110
+ 'no write needed. If you intended a change, re-read the ' \
111
+ 'file and try again.'
112
+ end
113
+
114
+ prompt = "OK to overwrite #{path}: #{existing.bytesize} → #{content.bytesize} bytes? (y/n)"
115
+ return "Error: user declined the write to #{path}." unless confirmer.confirm?(prompt: prompt)
116
+
117
+ write_bytes(resolved, content)
118
+ "Updated #{path} (#{existing.bytesize} → #{content.bytesize} bytes)"
119
+ else
120
+ write_bytes(resolved, content)
121
+ "Created #{path} (#{content.bytesize} bytes)"
122
+ end
123
+ rescue Tool::Workspace::Error, Error => e
124
+ "Error: #{e.message}"
125
+ rescue Errno::EACCES => e
126
+ "Error: cannot write #{path}: #{e.message}"
127
+ end
128
+
129
+ # Internal-only signal for LLM-actionable preconditions that Write
130
+ # detects before any filesystem mutation — today, "existing file is
131
+ # unreadable so the identical-content check can't run". Mirrors
132
+ # {Tool::Workspace::Error} in shape: caught by {.write}'s outer
133
+ # rescue and rendered as a +"Error: ..."+ observation. The class
134
+ # is +private_constant+ to keep it from leaking out as a public
135
+ # exception type; callers should never +rescue+ this directly.
136
+ class Error < StandardError; end
137
+ private_constant :Error
138
+
139
+ # Read the existing file in BINARY mode so the equality check is
140
+ # purely byte-wise. Raises {Error} on +Errno::EACCES+ so {.write}'s
141
+ # outer rescue can render a specific "cannot read for overwrite
142
+ # check" message — without this conversion the bottom +Errno::EACCES+
143
+ # rescue would mislabel the read-side problem as "cannot write".
144
+ #
145
+ # @param resolved [Pathname]
146
+ # @param path [String] original path for the error message
147
+ # @return [String]
148
+ # @raise [Error] when the file exists but isn't readable
149
+ def self.read_for_compare(resolved, path)
150
+ resolved.binread
151
+ rescue Errno::EACCES => e
152
+ raise Error, "cannot read #{path} for overwrite check: #{e.message}"
153
+ end
154
+ private_class_method :read_for_compare
155
+
156
+ # +mkdir_p+ the parent, then write. Split out so both the create-
157
+ # and overwrite-branches can reuse it without duplicating the
158
+ # +mkdir_p+ call.
159
+ #
160
+ # @param resolved [Pathname]
161
+ # @param content [String]
162
+ # @return [void]
163
+ def self.write_bytes(resolved, content)
164
+ FileUtils.mkdir_p(resolved.dirname)
165
+ resolved.write(content)
166
+ end
167
+ private_class_method :write_bytes
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby_llm'
4
+
5
+ module Pikuri
6
+ # A tool the LLM can request via OpenAI-style tool calling.
7
+ #
8
+ # {Tool} is a plain value object: +name+, +description+, +parameters+,
9
+ # and an +execute+ Proc that produces the observation. Pikuri's own
10
+ # code talks to this surface (the bundled tools, the sub-agent factory,
11
+ # the +Agent+ constructor); the conversion to ruby_llm's runtime shape
12
+ # happens at exactly one named seam — {#to_ruby_llm_tool} — which the
13
+ # +Agent+ calls when wiring tools into the underlying +RubyLLM::Chat+.
14
+ #
15
+ # Bundled tool implementations live under +lib/tool/+ and are required
16
+ # explicitly by the scripts that use them; +lib/tool.rb+ itself only
17
+ # introduces the {Tool} class and {Tool::Parameters} machinery.
18
+ #
19
+ # == Validation ordering
20
+ #
21
+ # Pikuri's {Parameters#validate} — DidYouMean suggestions, type coercion,
22
+ # all-errors-collected — is what the LLM must see when it emits bad
23
+ # arguments, not ruby_llm's keyword-presence checker. The synthetic
24
+ # class produced by {#to_ruby_llm_tool} defines +execute(**args)+: the
25
+ # keyrest signature makes +RubyLLM::Tool#validate_keyword_arguments+
26
+ # no-op (it bails out at the +accepts_extra_keywords+ check), so
27
+ # pikuri's validator runs inside {#run} before any user code does.
28
+ #
29
+ # == Error handling convention
30
+ #
31
+ # Tools split failures into two buckets:
32
+ #
33
+ # * *Recoverable* failures — anything the LLM can react to by retrying with
34
+ # different inputs (bad arguments, HTTP 4xx/5xx, network blips, search
35
+ # provider rate-limits, division by zero in the calculator, ...). These
36
+ # come back as a +"Error: <message>"+ String and become the next
37
+ # observation; the model sees them and self-corrects on the following
38
+ # turn instead of crashing the agent loop.
39
+ # * *Bugs* in pikuri's own code (parser regressions, schema misuse,
40
+ # misconfigured clients, ...). These keep raising. The LLM cannot fix
41
+ # them; a human needs to.
42
+ #
43
+ # Argument-validation failures from {Parameters#validate} are caught by
44
+ # {#run} and turned into +"Error: ..."+ observations. The +execute+ Proc
45
+ # owns the rest — it should follow the same convention internally for
46
+ # tool-specific recoverable failures, and let bugs raise.
47
+ class Tool
48
+ # @return [String] function name advertised to the LLM
49
+ attr_reader :name
50
+
51
+ # @return [String] human-readable description used by the LLM to decide
52
+ # when to call the tool
53
+ attr_reader :description
54
+
55
+ # @return [Tool::Parameters] declared schema; validates incoming
56
+ # arguments and serializes to the JSON Schema shape advertised to the
57
+ # LLM
58
+ attr_reader :parameters
59
+
60
+ # @return [Proc] callable invoked once arguments have been validated;
61
+ # receives validated keyword arguments and returns a +String+
62
+ # observation
63
+ attr_reader :execute
64
+
65
+ # @param name [String] function name advertised to the LLM
66
+ # @param description [String] human-readable description used by the LLM
67
+ # to decide when to call the tool
68
+ # @param parameters [Tool::Parameters] declared schema
69
+ # @param execute [Proc] callable invoked with validated keyword arguments
70
+ # that returns a +String+ observation. Recoverable failures should be
71
+ # returned as +"Error: <message>"+ Strings rather than raised — see
72
+ # "Error handling convention" above.
73
+ # @return [Tool]
74
+ def initialize(name:, description:, parameters:, execute:)
75
+ @name = name
76
+ @description = description
77
+ @parameters = parameters
78
+ @execute = execute
79
+ end
80
+
81
+ # Validate +args+ against {#parameters} and forward them as keyword
82
+ # arguments to {#execute}. Validation failures are caught and rendered
83
+ # as +"Error: <message>"+ Strings so the agent loop can feed them back
84
+ # to the LLM as the next observation; everything else bubbles up.
85
+ #
86
+ # @param args [Hash] raw arguments supplied by the LLM
87
+ # @return [String] tool observation, or +"Error: ..."+ on validation
88
+ # failure
89
+ def run(args)
90
+ validated = @parameters.validate(args)
91
+ @execute.call(**validated)
92
+ rescue Tool::Parameters::ValidationError => e
93
+ "Error: #{e.message}"
94
+ end
95
+
96
+ # Build a synthetic +RubyLLM::Tool+ subclass that wraps this Tool. The
97
+ # subclass is what +RubyLLM::Chat#with_tool+ accepts: ruby_llm
98
+ # instantiates it (+tool.new+) and routes tool calls through
99
+ # +instance.call(args)+, which lands in +#execute(**args)+ and
100
+ # delegates back to {#run} on this instance.
101
+ #
102
+ # @return [Class] anonymous +RubyLLM::Tool+ subclass
103
+ def to_ruby_llm_tool
104
+ pikuri_tool = self
105
+ schema = @parameters.to_h
106
+ tool_name = @name
107
+ tool_desc = @description
108
+
109
+ Class.new(RubyLLM::Tool) do
110
+ description(tool_desc)
111
+ params(schema)
112
+
113
+ define_singleton_method(:name) { tool_name }
114
+ define_method(:execute) { |**args| pikuri_tool.run(args) }
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'fileutils'
5
+
6
+ module Pikuri
7
+ # On-disk cache for string-keyed text payloads. Used by the bundled tools
8
+ # to avoid re-fetching the same page or re-issuing the same web-search
9
+ # query within a TTL window: {Tool::WebScrape.visit} caches the rendered
10
+ # Markdown for a URL, and {Tool::Search::Engines.search} caches the
11
+ # rendered result list for a query (the query string itself acts as the
12
+ # key — keys are SHA-256 hashed, so any opaque string works).
13
+ #
14
+ # Each tool wires its own {UrlCache} instance against a dedicated
15
+ # subdirectory under {ROOT_DIR}, so a +web_search+ query string and a
16
+ # +web_scrape+ URL string can never collide on the same cache file. There
17
+ # is no global default singleton — pass a fresh instance to whichever
18
+ # code needs caching, or use {NULL} to disable caching entirely.
19
+ #
20
+ # One file per entry, named +<sha256>.txt+ under {#initialize}'s +dir+.
21
+ # Freshness is tracked via the file's mtime; there is no sidecar metadata.
22
+ # Stale entries are simply overwritten the next time {#fetch} is called
23
+ # with the same key. To clear the cache, +rm -rf+ the directory.
24
+ #
25
+ # Not thread-safe: if two callers race on the same cold key, both compute
26
+ # and both write the same file. That is the intended tradeoff to keep this
27
+ # under a few dozen lines — the worst-case cost is a duplicate fetch.
28
+ class UrlCache
29
+ # Root directory under which per-tool cache subdirectories live.
30
+ # Follows the XDG Base Directory spec: +$XDG_CACHE_HOME/pikuri/url_cache+
31
+ # if the env var is set to a non-empty value, else
32
+ # +~/.cache/pikuri/url_cache+. Each tool picks its own subdir
33
+ # (e.g. +"#{ROOT_DIR}/web_scrape"+) so keys from different tools cannot
34
+ # collide. The directory is created lazily on first cache write; pikuri
35
+ # does not pre-create it.
36
+ # @return [String]
37
+ ROOT_DIR = begin
38
+ xdg = ENV['XDG_CACHE_HOME']
39
+ cache_home = xdg && !xdg.empty? ? xdg : File.join(Dir.home, '.cache')
40
+ File.join(cache_home, 'pikuri', 'url_cache')
41
+ end.freeze
42
+
43
+ # Default freshness window: 2 hours, in seconds.
44
+ #
45
+ # Long enough to cover a single interactive session — revisiting
46
+ # a scraped page or re-running a similar search within the same
47
+ # working window hits the cache. Short enough that resuming the
48
+ # next day doesn't serve stale news, docs, or search results.
49
+ # Reference points: opencode keeps no cache, the +pi-web-fetch+
50
+ # community extension uses 15 minutes, +pi-web-search+ uses 5;
51
+ # 2 hours sits comfortably above the "single follow-up" window
52
+ # those numbers are aimed at without holding content across days.
53
+ # @return [Integer]
54
+ DEFAULT_TTL = 2 * 60 * 60
55
+
56
+ # @param ttl [Integer] freshness window in seconds; entries with an
57
+ # mtime older than this are treated as misses
58
+ # @param dir [String] directory under which cache files live; created
59
+ # lazily on first write
60
+ def initialize(ttl:, dir:)
61
+ @ttl = ttl
62
+ @dir = dir
63
+ end
64
+
65
+ # Return the cached payload for +url+ if a fresh entry exists, otherwise
66
+ # yield to compute it, persist the result, and return it.
67
+ #
68
+ # The block is only invoked on a miss. If the block raises, no file is
69
+ # written — errors are not cached.
70
+ #
71
+ # @param url [String] cache key; a URL or any opaque string identifier
72
+ # @yieldreturn [String] payload to store and return on a miss
73
+ # @return [String] cached or freshly-computed payload
74
+ def fetch(url)
75
+ path = path_for(url)
76
+ return File.read(path) if fresh?(path)
77
+
78
+ content = yield
79
+ FileUtils.mkdir_p(@dir)
80
+ File.write(path, content)
81
+ content
82
+ end
83
+
84
+ # @param path [String]
85
+ # @return [Boolean] true when +path+ exists and was written within the
86
+ # TTL window
87
+ def fresh?(path)
88
+ File.exist?(path) && Time.now - File.mtime(path) < @ttl
89
+ end
90
+
91
+ # @param url [String]
92
+ # @return [String] absolute path of the cache file for +url+
93
+ def path_for(url)
94
+ File.join(@dir, "#{Digest::SHA256.hexdigest(url)}.txt")
95
+ end
96
+
97
+ # Null cache: a drop-in replacement that always misses and never
98
+ # persists. Use this in tests (or anywhere else you want caching off)
99
+ # without giving up the {UrlCache#fetch} contract.
100
+ NULL = Object.new
101
+ def NULL.fetch(_key)
102
+ yield
103
+ end
104
+ NULL.freeze
105
+ end
106
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pikuri
4
+ # Gem version, advertised in +pikuri.gemspec+. Bump on every release
5
+ # following semver: patch for bug fixes, minor for backward-compatible
6
+ # additions to the public surface (+Pikuri::Tool+ / +Pikuri::Agent+ /
7
+ # listeners / bundled tools), major for breaking changes to that
8
+ # surface or to the +bin/pikuri-*+ CLIs.
9
+ VERSION = '0.0.1'
10
+ end