docforge-cli 0.5.2__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/PKG-INFO +1 -1
  2. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/pyproject.toml +1 -1
  3. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/ingest.py +6 -4
  4. docforge_cli-0.6.0/src/docforge/sql/migrations/008_title_weighted_tsv.sql +42 -0
  5. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge_cli.egg-info/PKG-INFO +1 -1
  6. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge_cli.egg-info/SOURCES.txt +1 -0
  7. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/LICENSE +0 -0
  8. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/README.md +0 -0
  9. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/setup.cfg +0 -0
  10. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/__init__.py +0 -0
  11. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/__main__.py +0 -0
  12. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/api.py +0 -0
  13. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/cli.py +0 -0
  14. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/config.py +0 -0
  15. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/crawlers/__init__.py +0 -0
  16. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/crawlers/confluence.py +0 -0
  17. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/crawlers/git.py +0 -0
  18. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/db.py +0 -0
  19. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/embedder_api.py +0 -0
  20. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/lint.py +0 -0
  21. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/mcp_server.py +0 -0
  22. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/processors/__init__.py +0 -0
  23. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/processors/chunker.py +0 -0
  24. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/processors/embedder.py +0 -0
  25. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/processors/parser.py +0 -0
  26. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/query_log.py +0 -0
  27. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/ranking.py +0 -0
  28. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/remote_client.py +0 -0
  29. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/scripts/__init__.py +0 -0
  30. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/scripts/eval_search.py +0 -0
  31. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/scripts/latency_report.py +0 -0
  32. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sources.py +0 -0
  33. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/001_add_source_identifier.sql +0 -0
  34. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/002_add_status_index.sql +0 -0
  35. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/003_add_source_tags.sql +0 -0
  36. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/004_add_query_log.sql +0 -0
  37. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql +0 -0
  38. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/006_add_query_log_request_ms.sql +0 -0
  39. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/migrations/007_add_chunks_text_tsv.sql +0 -0
  40. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/sql/schema.sql +0 -0
  41. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/templates/docforge.yml +0 -0
  42. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/templates/docker-compose.yml +0 -0
  43. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/templates/mcp_client.py +0 -0
  44. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge/templates/sources.yml +0 -0
  45. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge_cli.egg-info/dependency_links.txt +0 -0
  46. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge_cli.egg-info/entry_points.txt +0 -0
  47. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge_cli.egg-info/requires.txt +0 -0
  48. {docforge_cli-0.5.2 → docforge_cli-0.6.0}/src/docforge_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docforge-cli
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: Forge searchable context from Confluence and git repos for AI coding assistants
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "docforge-cli"
7
- version = "0.5.2"
7
+ version = "0.6.0"
8
8
  description = "Forge searchable context from Confluence and git repos for AI coding assistants"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -173,14 +173,15 @@ async def _ingest_confluence_source(
173
173
  await conn.execute(
174
174
  """
175
175
  INSERT INTO chunks (source_id, chunk_index, text,
176
- embedding, section_title)
177
- VALUES ($1, $2, $3, $4, $5)
176
+ embedding, section_title, title)
177
+ VALUES ($1, $2, $3, $4, $5, $6)
178
178
  """,
179
179
  source_id,
180
180
  chunk.chunk_index,
181
181
  chunk.text,
182
182
  np.array(embedding, dtype=np.float32),
183
183
  chunk.section_title,
184
+ source.title,
184
185
  )
185
186
 
186
187
  logger.info("Stored %d chunks for: %s", len(chunks), source.title)
@@ -263,14 +264,15 @@ async def _ingest_git_source(
263
264
  await conn.execute(
264
265
  """
265
266
  INSERT INTO chunks (source_id, chunk_index, text,
266
- embedding, section_title)
267
- VALUES ($1, $2, $3, $4, $5)
267
+ embedding, section_title, title)
268
+ VALUES ($1, $2, $3, $4, $5, $6)
268
269
  """,
269
270
  source_id,
270
271
  chunk.chunk_index,
271
272
  chunk.text,
272
273
  np.array(embedding, dtype=np.float32),
273
274
  chunk.section_title,
275
+ file.title,
274
276
  )
275
277
 
276
278
  logger.info("Stored %d chunks for: %s/%s", len(chunks), source.title, file.title)
@@ -0,0 +1,42 @@
1
+ -- Migration 008: weighted text_tsv with title and section_title.
2
+ --
3
+ -- Replaces migration 007's plain to_tsvector('english', text) with a
4
+ -- three-tier weighted variant: title='A', section_title='B', text='D'.
5
+ -- ts_rank_cd respects setweight using its default weights array
6
+ -- {A: 1.0, B: 0.4, C: 0.2, D: 0.1}, so title tokens contribute ~10x a
7
+ -- body token in BM25-style ranking.
8
+ --
9
+ -- Postgres GENERATED ALWAYS expressions cannot be modified in place;
10
+ -- the column is dropped and re-created. Lock window is roughly 15-90s
11
+ -- on ~20k chunks (mostly the ADD COLUMN ... STORED step recomputing
12
+ -- three to_tsvector calls per row under AccessExclusiveLock). Acceptable
13
+ -- for low-volume production; revisit if corpus grows past ~1M chunks.
14
+ --
15
+ -- Idempotency: best-effort via IF [NOT] EXISTS qualifiers. Re-running
16
+ -- causes an unnecessary drop+recreate of text_tsv but doesn't break
17
+ -- anything. The migration runs once per release in practice.
18
+
19
+ -- Step 1: add the title column (idempotent).
20
+ ALTER TABLE chunks ADD COLUMN IF NOT EXISTS title TEXT NOT NULL DEFAULT '';
21
+
22
+ -- Step 2: backfill title from sources via JOIN UPDATE.
23
+ -- Only updates rows where title is still the empty default — protects
24
+ -- against repeat runs that would otherwise rewrite the same data.
25
+ UPDATE chunks
26
+ SET title = s.title
27
+ FROM sources s
28
+ WHERE s.id = chunks.source_id AND chunks.title = '';
29
+
30
+ -- Step 3: drop the v0.5.0 text_tsv (plain to_tsvector('english', text)).
31
+ ALTER TABLE chunks DROP COLUMN IF EXISTS text_tsv;
32
+
33
+ -- Step 4: re-add text_tsv with the three-tier weighted expression.
34
+ ALTER TABLE chunks ADD COLUMN IF NOT EXISTS text_tsv tsvector
35
+ GENERATED ALWAYS AS (
36
+ setweight(to_tsvector('english', title), 'A') ||
37
+ setweight(to_tsvector('english', coalesce(section_title, '')), 'B') ||
38
+ setweight(to_tsvector('english', text), 'D')
39
+ ) STORED;
40
+
41
+ -- Step 5: re-create the GIN index (was dropped with the old column).
42
+ CREATE INDEX IF NOT EXISTS chunks_text_tsv_idx ON chunks USING GIN (text_tsv);
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docforge-cli
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: Forge searchable context from Confluence and git repos for AI coding assistants
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
@@ -33,6 +33,7 @@ src/docforge/sql/migrations/004_add_query_log.sql
33
33
  src/docforge/sql/migrations/005_add_query_log_user_oid.sql
34
34
  src/docforge/sql/migrations/006_add_query_log_request_ms.sql
35
35
  src/docforge/sql/migrations/007_add_chunks_text_tsv.sql
36
+ src/docforge/sql/migrations/008_title_weighted_tsv.sql
36
37
  src/docforge/templates/docforge.yml
37
38
  src/docforge/templates/docker-compose.yml
38
39
  src/docforge/templates/mcp_client.py
File without changes
File without changes
File without changes