uptriever 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/uptriever/chunker.rb +1 -1
- data/lib/uptriever/client.rb +40 -8
- data/lib/uptriever/config.rb +2 -2
- data/lib/uptriever/document.rb +7 -3
- data/lib/uptriever/version.rb +1 -1
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af61ca4ef13ebf666b7637be03e243fa891ba69052bb34bdd2424b612c6a5a42
|
4
|
+
data.tar.gz: f418f2b390df6e8bde1dfc34eafd2881174709272644c7b7a26f83f09d598e3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b07feade83badf77f8177e3f222780348427f33d3dd37f00d6935b32b97d0c253711fdb3c2c1a4112bef535b24a8fb925c500e0d4e684b884484a4f1a8c1f74c
|
7
|
+
data.tar.gz: ee8faddee9277a2c407b8b2f681b63fcc58a9a9f004a9642e6e1b890e3cd3dfc69a7f3c04498359e6b23b9f51fb4588818db2e28904c816b244d6368a1ea9362
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,16 @@
|
|
2
2
|
|
3
3
|
## master
|
4
4
|
|
5
|
+
## 0.2.0 (2025-08-05)
|
6
|
+
|
7
|
+
- Ignore frontmatter. ([@palkan][])
|
8
|
+
|
9
|
+
- Support `.mdx` ([@palkan][])
|
10
|
+
|
11
|
+
## 0.1.2 (2025-01-03)
|
12
|
+
|
13
|
+
- Fix the bug with links including PWD. ([@palkan][])
|
14
|
+
|
5
15
|
## 0.1.1 (2024-07-25)
|
6
16
|
|
7
17
|
- Add ERB support to allow include dynamic fragments to configs. ([@palkan][])
|
data/lib/uptriever/chunker.rb
CHANGED
data/lib/uptriever/client.rb
CHANGED
@@ -8,9 +8,10 @@ module Uptriever
|
|
8
8
|
BASE_URL = "https://api.trieve.ai/api"
|
9
9
|
|
10
10
|
attr_reader :headers
|
11
|
-
private attr_reader :dry_run
|
11
|
+
private attr_reader :dry_run, :dataset_id
|
12
12
|
|
13
|
-
def initialize(api_key, dataset, dry_run: false)
|
13
|
+
def initialize(api_key = ENV["TRIEVE_API_KEY"], dataset = ENV["TRIEVE_DATASET"], dry_run: false)
|
14
|
+
@dataset_id = dataset
|
14
15
|
@dry_run = dry_run
|
15
16
|
@headers = {
|
16
17
|
"Authorization" => api_key,
|
@@ -25,22 +26,53 @@ module Uptriever
|
|
25
26
|
|
26
27
|
def push_chunk(chunk, upsert: true)
|
27
28
|
chunk[:upsert_by_tracking_id] = upsert
|
28
|
-
perform_request("/chunk", chunk.to_json)
|
29
|
+
perform_request("/chunk", chunk.to_json).inspect
|
30
|
+
end
|
31
|
+
|
32
|
+
def scroll_chunks(per_page: 100)
|
33
|
+
data = {
|
34
|
+
filters: {must: nil},
|
35
|
+
page_size: per_page
|
36
|
+
}
|
37
|
+
|
38
|
+
offset_id = nil
|
39
|
+
|
40
|
+
loop do
|
41
|
+
data[:offset_chunk_id] = offset_id if offset_id
|
42
|
+
data = perform_request("/chunks/scroll", data.to_json)
|
43
|
+
|
44
|
+
chunks = data.fetch("chunks")
|
45
|
+
chunks = chunks.select { _1["id"] != offset_id } if offset_id
|
46
|
+
|
47
|
+
break if chunks.empty?
|
48
|
+
|
49
|
+
chunks.each { yield _1 }
|
50
|
+
|
51
|
+
offset_id = chunks.last["id"]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete_chunk(id)
|
56
|
+
perform_request("/chunk/#{id}", method: :delete, expected_code: 204)
|
57
|
+
end
|
58
|
+
|
59
|
+
def usage
|
60
|
+
perform_request("/dataset/usage/#{dataset_id}", method: :get)
|
29
61
|
end
|
30
62
|
|
31
63
|
private
|
32
64
|
|
33
|
-
def perform_request(path, data)
|
65
|
+
def perform_request(path, data = nil, method: :post, expected_code: 200)
|
34
66
|
uri = URI.parse(BASE_URL + path)
|
35
67
|
|
36
68
|
http = Net::HTTP.new(uri.host, uri.port)
|
37
69
|
http.use_ssl = true if uri.scheme == "https"
|
38
70
|
|
39
|
-
request = Net::HTTP
|
71
|
+
request = Net::HTTP.const_get(method.to_s.capitalize).new(
|
40
72
|
uri.request_uri,
|
41
73
|
headers.merge("Content-Type" => "application/json")
|
42
74
|
)
|
43
|
-
request.body = data
|
75
|
+
request.body = data if data
|
44
76
|
|
45
77
|
if dry_run
|
46
78
|
puts "[DRY RUN] Perform POST #{path}: #{data}"
|
@@ -49,11 +81,11 @@ module Uptriever
|
|
49
81
|
|
50
82
|
response = http.request(request)
|
51
83
|
|
52
|
-
if response.code.to_i !=
|
84
|
+
if response.code.to_i != expected_code
|
53
85
|
raise "Invalid response code: #{response.code} (#{response.body[100...]})"
|
54
86
|
end
|
55
87
|
|
56
|
-
JSON.parse(response.body)
|
88
|
+
JSON.parse(response.body) if response.body
|
57
89
|
end
|
58
90
|
end
|
59
91
|
end
|
data/lib/uptriever/config.rb
CHANGED
@@ -10,7 +10,7 @@ module Uptriever
|
|
10
10
|
attr_reader :config_path, :root_dir
|
11
11
|
|
12
12
|
def initialize(root_dir)
|
13
|
-
@root_dir = root_dir
|
13
|
+
@root_dir = File.expand_path(root_dir)
|
14
14
|
@config_path = File.join(root_dir, ".trieve.yml")
|
15
15
|
raise ArgumentError, ".trieve.yml is missing in the #{root_dir}" unless File.file?(config_path)
|
16
16
|
end
|
@@ -32,7 +32,7 @@ module Uptriever
|
|
32
32
|
File.join(config["url_prefix"], _1)
|
33
33
|
end
|
34
34
|
|
35
|
-
link = page["link"] || File.join(config.fetch("hostname"), relative_link)
|
35
|
+
link = (page["link"] || File.join(config.fetch("hostname"), relative_link)).gsub(/([^:])\/{2,}/, '\1/')
|
36
36
|
id = page["id"] || relative_link.sub(/^\//, "").gsub(/[\/-]/, "-")
|
37
37
|
|
38
38
|
Document.new(id, page["source"], link, **defaults.merge({groups: page["groups"], tags: page["tags"], weight: page["weight"]}.compact))
|
data/lib/uptriever/document.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "redcarpet"
|
4
|
+
require "front_matter_parser"
|
5
|
+
require "front_matter_parser/syntax_parser"
|
6
|
+
FrontMatterParser::SyntaxParser::Mdx = FrontMatterParser::SyntaxParser::Md
|
4
7
|
|
5
8
|
module Uptriever
|
6
9
|
class Document
|
@@ -17,9 +20,10 @@ module Uptriever
|
|
17
20
|
|
18
21
|
def to_html
|
19
22
|
case File.extname(path)
|
20
|
-
when ".md"
|
23
|
+
when ".md", ".mdx"
|
24
|
+
parsed = FrontMatterParser::Parser.parse_file(path)
|
21
25
|
markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true)
|
22
|
-
markdown.render(
|
26
|
+
markdown.render(parsed.content)
|
23
27
|
when ".html"
|
24
28
|
File.read(path)
|
25
29
|
else
|
@@ -29,7 +33,7 @@ module Uptriever
|
|
29
33
|
|
30
34
|
def to_chunk_json
|
31
35
|
{
|
32
|
-
chunk_html: to_html,
|
36
|
+
chunk_html: +to_html,
|
33
37
|
link:,
|
34
38
|
tracking_id: id,
|
35
39
|
weight:
|
data/lib/uptriever/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uptriever
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladimir Dementyev
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-08-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redcarpet
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: front_matter_parser
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: bundler
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -206,7 +220,7 @@ metadata:
|
|
206
220
|
documentation_uri: https://github.com/palkan/uptriever
|
207
221
|
homepage_uri: https://github.com/palkan/uptriever
|
208
222
|
source_code_uri: https://github.com/palkan/uptriever
|
209
|
-
post_install_message:
|
223
|
+
post_install_message:
|
210
224
|
rdoc_options: []
|
211
225
|
require_paths:
|
212
226
|
- lib
|
@@ -222,7 +236,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
222
236
|
version: '0'
|
223
237
|
requirements: []
|
224
238
|
rubygems_version: 3.4.19
|
225
|
-
signing_key:
|
239
|
+
signing_key:
|
226
240
|
specification_version: 4
|
227
241
|
summary: Upload documenbts to Trieve
|
228
242
|
test_files: []
|