uptriever 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f44598b07e4b7c130e49c2308587dcc675eda01710f9ad24ce2fb4284fcfea63
4
- data.tar.gz: 98ba686f341d7cdb66c06db19b2c0fba4c871a9c8b92c760177a6fd8f51d673e
3
+ metadata.gz: af61ca4ef13ebf666b7637be03e243fa891ba69052bb34bdd2424b612c6a5a42
4
+ data.tar.gz: f418f2b390df6e8bde1dfc34eafd2881174709272644c7b7a26f83f09d598e3f
5
5
  SHA512:
6
- metadata.gz: 7627037babef9668fd456b79bd09e34686e543871e51eff72e8126ebf08113f3cc79517fa2a62846f192651e34d2e36bebb30a8d8d574aeeb9eef67685fa1f3a
7
- data.tar.gz: a74448f80ab208acbf768c31464f730ea9402e73a68630fc4f71f372b93f26b573890d329640213dedc2afe977b9b9e5acd78fc59929e9285614ee16201677dc
6
+ metadata.gz: b07feade83badf77f8177e3f222780348427f33d3dd37f00d6935b32b97d0c253711fdb3c2c1a4112bef535b24a8fb925c500e0d4e684b884484a4f1a8c1f74c
7
+ data.tar.gz: ee8faddee9277a2c407b8b2f681b63fcc58a9a9f004a9642e6e1b890e3cd3dfc69a7f3c04498359e6b23b9f51fb4588818db2e28904c816b244d6368a1ea9362
data/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  ## master
4
4
 
5
+ ## 0.2.0 (2025-08-05)
6
+
7
+ - Ignore frontmatter. ([@palkan][])
8
+
9
+ - Support `.mdx` ([@palkan][])
10
+
11
+ ## 0.1.2 (2025-01-03)
12
+
13
+ - Fix the bug with links including PWD. ([@palkan][])
14
+
5
15
  ## 0.1.1 (2024-07-25)
6
16
 
7
17
  - Add ERB support to allow include dynamic fragments to configs. ([@palkan][])
@@ -41,6 +41,6 @@ module Uptriever
41
41
 
42
42
  private
43
43
 
44
- def chunk_dup = chunk.dup.tap { _1[:chunk_html] = +"" }
44
+ def chunk_dup = chunk.dup
45
45
  end
46
46
  end
@@ -8,9 +8,10 @@ module Uptriever
8
8
  BASE_URL = "https://api.trieve.ai/api"
9
9
 
10
10
  attr_reader :headers
11
- private attr_reader :dry_run
11
+ private attr_reader :dry_run, :dataset_id
12
12
 
13
- def initialize(api_key, dataset, dry_run: false)
13
+ def initialize(api_key = ENV["TRIEVE_API_KEY"], dataset = ENV["TRIEVE_DATASET"], dry_run: false)
14
+ @dataset_id = dataset
14
15
  @dry_run = dry_run
15
16
  @headers = {
16
17
  "Authorization" => api_key,
@@ -25,22 +26,53 @@ module Uptriever
25
26
 
26
27
  def push_chunk(chunk, upsert: true)
27
28
  chunk[:upsert_by_tracking_id] = upsert
28
- perform_request("/chunk", chunk.to_json)
29
+ perform_request("/chunk", chunk.to_json).inspect
30
+ end
31
+
32
+ def scroll_chunks(per_page: 100)
33
+ data = {
34
+ filters: {must: nil},
35
+ page_size: per_page
36
+ }
37
+
38
+ offset_id = nil
39
+
40
+ loop do
41
+ data[:offset_chunk_id] = offset_id if offset_id
42
+ data = perform_request("/chunks/scroll", data.to_json)
43
+
44
+ chunks = data.fetch("chunks")
45
+ chunks = chunks.select { _1["id"] != offset_id } if offset_id
46
+
47
+ break if chunks.empty?
48
+
49
+ chunks.each { yield _1 }
50
+
51
+ offset_id = chunks.last["id"]
52
+ end
53
+ end
54
+
55
+ def delete_chunk(id)
56
+ perform_request("/chunk/#{id}", method: :delete, expected_code: 204)
57
+ end
58
+
59
+ def usage
60
+ perform_request("/dataset/usage/#{dataset_id}", method: :get)
29
61
  end
30
62
 
31
63
  private
32
64
 
33
- def perform_request(path, data)
65
+ def perform_request(path, data = nil, method: :post, expected_code: 200)
34
66
  uri = URI.parse(BASE_URL + path)
35
67
 
36
68
  http = Net::HTTP.new(uri.host, uri.port)
37
69
  http.use_ssl = true if uri.scheme == "https"
38
70
 
39
- request = Net::HTTP::Post.new(
71
+ request = Net::HTTP.const_get(method.to_s.capitalize).new(
40
72
  uri.request_uri,
41
73
  headers.merge("Content-Type" => "application/json")
42
74
  )
43
- request.body = data
75
+ request.body = data if data
44
76
 
45
77
  if dry_run
46
78
  puts "[DRY RUN] Perform POST #{path}: #{data}"
@@ -49,11 +81,11 @@ module Uptriever
49
81
 
50
82
  response = http.request(request)
51
83
 
52
- if response.code.to_i != 200
84
+ if response.code.to_i != expected_code
53
85
  raise "Invalid response code: #{response.code} (#{response.body[100...]})"
54
86
  end
55
87
 
56
- JSON.parse(response.body)
88
+ JSON.parse(response.body) if response.body
57
89
  end
58
90
  end
59
91
  end
@@ -10,7 +10,7 @@ module Uptriever
10
10
  attr_reader :config_path, :root_dir
11
11
 
12
12
  def initialize(root_dir)
13
- @root_dir = root_dir
13
+ @root_dir = File.expand_path(root_dir)
14
14
  @config_path = File.join(root_dir, ".trieve.yml")
15
15
  raise ArgumentError, ".trieve.yml is missing in the #{root_dir}" unless File.file?(config_path)
16
16
  end
@@ -32,7 +32,7 @@ module Uptriever
32
32
  File.join(config["url_prefix"], _1)
33
33
  end
34
34
 
35
- link = page["link"] || File.join(config.fetch("hostname"), relative_link)
35
+ link = (page["link"] || File.join(config.fetch("hostname"), relative_link)).gsub(/([^:])\/{2,}/, '\1/')
36
36
  id = page["id"] || relative_link.sub(/^\//, "").gsub(/[\/-]/, "-")
37
37
 
38
38
  Document.new(id, page["source"], link, **defaults.merge({groups: page["groups"], tags: page["tags"], weight: page["weight"]}.compact))
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "redcarpet"
4
+ require "front_matter_parser"
5
+ require "front_matter_parser/syntax_parser"
6
+ FrontMatterParser::SyntaxParser::Mdx = FrontMatterParser::SyntaxParser::Md
4
7
 
5
8
  module Uptriever
6
9
  class Document
@@ -17,9 +20,10 @@ module Uptriever
17
20
 
18
21
  def to_html
19
22
  case File.extname(path)
20
- when ".md"
23
+ when ".md", ".mdx"
24
+ parsed = FrontMatterParser::Parser.parse_file(path)
21
25
  markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true)
22
- markdown.render(File.read(path))
26
+ markdown.render(parsed.content)
23
27
  when ".html"
24
28
  File.read(path)
25
29
  else
@@ -29,7 +33,7 @@ module Uptriever
29
33
 
30
34
  def to_chunk_json
31
35
  {
32
- chunk_html: to_html,
36
+ chunk_html: +to_html,
33
37
  link:,
34
38
  tracking_id: id,
35
39
  weight:
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Uptriever # :nodoc:
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uptriever
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vladimir Dementyev
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-25 00:00:00.000000000 Z
11
+ date: 2025-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redcarpet
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: front_matter_parser
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: bundler
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -206,7 +220,7 @@ metadata:
206
220
  documentation_uri: https://github.com/palkan/uptriever
207
221
  homepage_uri: https://github.com/palkan/uptriever
208
222
  source_code_uri: https://github.com/palkan/uptriever
209
- post_install_message:
223
+ post_install_message:
210
224
  rdoc_options: []
211
225
  require_paths:
212
226
  - lib
@@ -222,7 +236,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
236
  version: '0'
223
237
  requirements: []
224
238
  rubygems_version: 3.4.19
225
- signing_key:
239
+ signing_key:
226
240
  specification_version: 4
227
241
  summary: Upload documenbts to Trieve
228
242
  test_files: []