fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module FactDb
|
|
4
|
-
module Services
|
|
5
|
-
class ContentService
|
|
6
|
-
attr_reader :config
|
|
7
|
-
|
|
8
|
-
def initialize(config = FactDb.config)
|
|
9
|
-
@config = config
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
def create(raw_text, type:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
|
|
13
|
-
content_hash = Digest::SHA256.hexdigest(raw_text)
|
|
14
|
-
|
|
15
|
-
# Check for duplicate content
|
|
16
|
-
existing = Models::Content.find_by(content_hash: content_hash)
|
|
17
|
-
return existing if existing
|
|
18
|
-
|
|
19
|
-
embedding = generate_embedding(raw_text)
|
|
20
|
-
|
|
21
|
-
Models::Content.create!(
|
|
22
|
-
raw_text: raw_text,
|
|
23
|
-
content_hash: content_hash,
|
|
24
|
-
content_type: type.to_s,
|
|
25
|
-
title: title,
|
|
26
|
-
source_uri: source_uri,
|
|
27
|
-
source_metadata: metadata,
|
|
28
|
-
captured_at: captured_at,
|
|
29
|
-
embedding: embedding
|
|
30
|
-
)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def find(id)
|
|
34
|
-
Models::Content.find(id)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def find_by_hash(hash)
|
|
38
|
-
Models::Content.find_by(content_hash: hash)
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def search(query, type: nil, from: nil, to: nil, limit: 20)
|
|
42
|
-
scope = Models::Content.search_text(query)
|
|
43
|
-
scope = scope.by_type(type) if type
|
|
44
|
-
scope = scope.captured_after(from) if from
|
|
45
|
-
scope = scope.captured_before(to) if to
|
|
46
|
-
scope.order(captured_at: :desc).limit(limit)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def semantic_search(query, limit: 20)
|
|
50
|
-
embedding = generate_embedding(query)
|
|
51
|
-
return Models::Content.none unless embedding
|
|
52
|
-
|
|
53
|
-
Models::Content.nearest_neighbors(embedding, limit: limit)
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def by_type(type, limit: nil)
|
|
57
|
-
scope = Models::Content.by_type(type).order(captured_at: :desc)
|
|
58
|
-
scope = scope.limit(limit) if limit
|
|
59
|
-
scope
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def between(from, to)
|
|
63
|
-
Models::Content.captured_between(from, to).order(captured_at: :asc)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def recent(limit: 10)
|
|
67
|
-
Models::Content.order(captured_at: :desc).limit(limit)
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def stats
|
|
71
|
-
{
|
|
72
|
-
total: Models::Content.count,
|
|
73
|
-
total_count: Models::Content.count,
|
|
74
|
-
by_type: Models::Content.group(:content_type).count,
|
|
75
|
-
earliest: Models::Content.minimum(:captured_at),
|
|
76
|
-
latest: Models::Content.maximum(:captured_at),
|
|
77
|
-
total_words: Models::Content.sum("array_length(regexp_split_to_array(raw_text, '\\s+'), 1)")
|
|
78
|
-
}
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
private
|
|
82
|
-
|
|
83
|
-
def generate_embedding(text)
|
|
84
|
-
return nil unless config.embedding_generator
|
|
85
|
-
|
|
86
|
-
config.embedding_generator.call(text)
|
|
87
|
-
rescue StandardError => e
|
|
88
|
-
config.logger&.warn("Failed to generate embedding: #{e.message}")
|
|
89
|
-
nil
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
end
|