htm 0.0.1 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.aigcm_msg +1 -0
- data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
- data/.claude/settings.local.json +92 -0
- data/.envrc +1 -0
- data/.irbrc +283 -80
- data/.tbls.yml +31 -0
- data/CHANGELOG.md +314 -16
- data/CLAUDE.md +603 -0
- data/README.md +76 -5
- data/Rakefile +5 -0
- data/SETUP.md +132 -101
- data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
- data/db/migrate/00002_create_robots.rb +11 -0
- data/db/migrate/00003_create_file_sources.rb +20 -0
- data/db/migrate/00004_create_nodes.rb +65 -0
- data/db/migrate/00005_create_tags.rb +13 -0
- data/db/migrate/00006_create_node_tags.rb +18 -0
- data/db/migrate/00007_create_robot_nodes.rb +26 -0
- data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
- data/db/schema.sql +390 -36
- data/docs/api/database.md +19 -232
- data/docs/api/embedding-service.md +1 -7
- data/docs/api/htm.md +305 -364
- data/docs/api/index.md +1 -7
- data/docs/api/long-term-memory.md +342 -590
- data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
- data/docs/api/yard/HTM/AuthorizationError.md +11 -0
- data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
- data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
- data/docs/api/yard/HTM/Configuration.md +175 -0
- data/docs/api/yard/HTM/Database.md +99 -0
- data/docs/api/yard/HTM/DatabaseError.md +14 -0
- data/docs/api/yard/HTM/EmbeddingError.md +18 -0
- data/docs/api/yard/HTM/EmbeddingService.md +58 -0
- data/docs/api/yard/HTM/Error.md +11 -0
- data/docs/api/yard/HTM/JobAdapter.md +39 -0
- data/docs/api/yard/HTM/LongTermMemory.md +342 -0
- data/docs/api/yard/HTM/NotFoundError.md +17 -0
- data/docs/api/yard/HTM/Observability.md +107 -0
- data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
- data/docs/api/yard/HTM/Railtie.md +27 -0
- data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
- data/docs/api/yard/HTM/TagError.md +18 -0
- data/docs/api/yard/HTM/TagService.md +67 -0
- data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
- data/docs/api/yard/HTM/Timeframe.md +40 -0
- data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
- data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
- data/docs/api/yard/HTM/ValidationError.md +20 -0
- data/docs/api/yard/HTM/WorkingMemory.md +131 -0
- data/docs/api/yard/HTM.md +80 -0
- data/docs/api/yard/index.csv +179 -0
- data/docs/api/yard-reference.md +51 -0
- data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
- data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
- data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
- data/docs/architecture/adrs/index.md +2 -13
- data/docs/architecture/hive-mind.md +165 -166
- data/docs/architecture/index.md +2 -2
- data/docs/architecture/overview.md +5 -171
- data/docs/architecture/two-tier-memory.md +1 -35
- data/docs/assets/images/adr-010-current-architecture.svg +37 -0
- data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
- data/docs/assets/images/adr-dependency-tree.svg +93 -0
- data/docs/assets/images/class-hierarchy.svg +55 -0
- data/docs/assets/images/exception-hierarchy.svg +45 -0
- data/docs/assets/images/htm-architecture-overview.svg +83 -0
- data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
- data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
- data/docs/assets/images/htm-eviction-process.svg +141 -0
- data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
- data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
- data/docs/assets/images/htm-node-states.svg +123 -0
- data/docs/assets/images/project-structure.svg +78 -0
- data/docs/assets/images/test-directory-structure.svg +38 -0
- data/{dbdoc → docs/database}/README.md +127 -125
- data/docs/database/public.file_sources.md +42 -0
- data/docs/database/public.file_sources.svg +211 -0
- data/{dbdoc → docs/database}/public.node_tags.md +7 -8
- data/docs/database/public.node_tags.svg +239 -0
- data/{dbdoc → docs/database}/public.nodes.md +22 -17
- data/docs/database/public.nodes.svg +271 -0
- data/docs/database/public.robot_nodes.md +46 -0
- data/docs/database/public.robot_nodes.svg +243 -0
- data/{dbdoc → docs/database}/public.robots.md +2 -3
- data/docs/database/public.robots.svg +161 -0
- data/docs/database/public.tags.svg +139 -0
- data/{dbdoc → docs/database}/schema.json +941 -630
- data/docs/database/schema.svg +282 -0
- data/docs/development/index.md +1 -29
- data/docs/development/schema.md +134 -309
- data/docs/development/testing.md +1 -9
- data/docs/getting-started/index.md +47 -0
- data/docs/{installation.md → getting-started/installation.md} +2 -2
- data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
- data/docs/guides/adding-memories.md +295 -643
- data/docs/guides/recalling-memories.md +36 -1
- data/docs/guides/search-strategies.md +85 -51
- data/docs/images/htm-er-diagram.svg +156 -0
- data/docs/index.md +16 -31
- data/docs/multi_framework_support.md +4 -4
- data/examples/README.md +280 -0
- data/examples/basic_usage.rb +18 -16
- data/examples/cli_app/htm_cli.rb +146 -8
- data/examples/cli_app/temp.log +93 -0
- data/examples/custom_llm_configuration.rb +1 -2
- data/examples/example_app/app.rb +11 -14
- data/examples/file_loader_usage.rb +177 -0
- data/examples/robot_groups/lib/robot_group.rb +419 -0
- data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
- data/examples/robot_groups/multi_process.rb +286 -0
- data/examples/robot_groups/robot_worker.rb +136 -0
- data/examples/robot_groups/same_process.rb +229 -0
- data/examples/sinatra_app/Gemfile +1 -0
- data/examples/sinatra_app/Gemfile.lock +166 -0
- data/examples/sinatra_app/app.rb +219 -24
- data/examples/timeframe_demo.rb +276 -0
- data/lib/htm/active_record_config.rb +10 -3
- data/lib/htm/circuit_breaker.rb +202 -0
- data/lib/htm/configuration.rb +313 -80
- data/lib/htm/database.rb +67 -36
- data/lib/htm/embedding_service.rb +39 -2
- data/lib/htm/errors.rb +131 -11
- data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
- data/lib/htm/job_adapter.rb +10 -3
- data/lib/htm/jobs/generate_embedding_job.rb +5 -4
- data/lib/htm/jobs/generate_tags_job.rb +4 -0
- data/lib/htm/loaders/markdown_loader.rb +263 -0
- data/lib/htm/loaders/paragraph_chunker.rb +112 -0
- data/lib/htm/long_term_memory.rb +601 -321
- data/lib/htm/models/file_source.rb +99 -0
- data/lib/htm/models/node.rb +116 -12
- data/lib/htm/models/robot.rb +53 -4
- data/lib/htm/models/robot_node.rb +51 -0
- data/lib/htm/models/tag.rb +302 -0
- data/lib/htm/observability.rb +395 -0
- data/lib/htm/tag_service.rb +60 -3
- data/lib/htm/tasks.rb +29 -0
- data/lib/htm/timeframe.rb +194 -0
- data/lib/htm/timeframe_extractor.rb +307 -0
- data/lib/htm/version.rb +1 -1
- data/lib/htm/working_memory.rb +165 -70
- data/lib/htm.rb +352 -133
- data/lib/tasks/doc.rake +300 -0
- data/lib/tasks/files.rake +299 -0
- data/lib/tasks/htm.rake +188 -2
- data/lib/tasks/jobs.rake +10 -12
- data/lib/tasks/tags.rake +194 -0
- data/mkdocs.yml +91 -9
- data/notes/ARCHITECTURE_REVIEW.md +1167 -0
- data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
- data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
- data/notes/next_steps.md +100 -0
- data/notes/plan.md +627 -0
- data/notes/tag_ontology_enhancement_ideas.md +222 -0
- data/notes/timescaledb_removal_summary.md +200 -0
- metadata +177 -37
- data/db/migrate/20250101000002_create_robots.rb +0 -14
- data/db/migrate/20250101000003_create_nodes.rb +0 -42
- data/db/migrate/20250101000005_create_tags.rb +0 -38
- data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
- data/dbdoc/public.node_tags.svg +0 -112
- data/dbdoc/public.nodes.svg +0 -118
- data/dbdoc/public.robots.svg +0 -90
- data/dbdoc/public.tags.svg +0 -60
- data/dbdoc/schema.svg +0 -154
- data/{dbdoc → docs/database}/public.node_stats.md +0 -0
- data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
- data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
- data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
- data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
- data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
- data/{dbdoc → docs/database}/public.operations_log.md +0 -0
- data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
- data/{dbdoc → docs/database}/public.relationships.md +0 -0
- data/{dbdoc → docs/database}/public.relationships.svg +0 -0
- data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
- data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
- data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
- data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
- data/{dbdoc → docs/database}/public.tags.md +3 -3
- /data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
- /data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'date'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
class HTM
|
|
7
|
+
# Timeframe - Normalizes various timeframe inputs for database queries
|
|
8
|
+
#
|
|
9
|
+
# Handles multiple input types and normalizes them to either:
|
|
10
|
+
# - nil (no timeframe filter)
|
|
11
|
+
# - Range (single time window)
|
|
12
|
+
# - Array<Range> (multiple time windows, OR'd together)
|
|
13
|
+
#
|
|
14
|
+
# @example Various input types
|
|
15
|
+
# Timeframe.normalize(nil) # => nil (no filter)
|
|
16
|
+
# Timeframe.normalize(Date.today) # => Range for entire day
|
|
17
|
+
# Timeframe.normalize(Time.now) # => Range for entire day
|
|
18
|
+
# Timeframe.normalize("last week") # => Range from chronic/extractor
|
|
19
|
+
# Timeframe.normalize(:auto, query: "...") # => Extract from query text
|
|
20
|
+
# Timeframe.normalize(range1..range2) # => Pass through
|
|
21
|
+
# Timeframe.normalize([range1, range2]) # => Array of ranges
|
|
22
|
+
#
|
|
23
|
+
class Timeframe
|
|
24
|
+
# Result structure for :auto mode
|
|
25
|
+
Result = Struct.new(:timeframe, :query, :extracted, keyword_init: true)
|
|
26
|
+
|
|
27
|
+
class << self
|
|
28
|
+
# Normalize a timeframe input to nil, Range, or Array<Range>
|
|
29
|
+
#
|
|
30
|
+
# @param input [nil, Range, Array, Date, DateTime, Time, String, Symbol] Timeframe specification
|
|
31
|
+
# @param query [String, nil] Query text (required when input is :auto)
|
|
32
|
+
# @return [nil, Range, Array<Range>] Normalized timeframe
|
|
33
|
+
# @return [Result] When input is :auto, returns Result with :timeframe, :query, :extracted
|
|
34
|
+
#
|
|
35
|
+
def normalize(input, query: nil)
|
|
36
|
+
case input
|
|
37
|
+
when nil
|
|
38
|
+
nil
|
|
39
|
+
|
|
40
|
+
when :auto
|
|
41
|
+
normalize_auto(query)
|
|
42
|
+
|
|
43
|
+
when Range
|
|
44
|
+
validate_range!(input)
|
|
45
|
+
input
|
|
46
|
+
|
|
47
|
+
when Array
|
|
48
|
+
normalize_array(input)
|
|
49
|
+
|
|
50
|
+
when Date
|
|
51
|
+
normalize_date(input)
|
|
52
|
+
|
|
53
|
+
when DateTime
|
|
54
|
+
normalize_datetime(input)
|
|
55
|
+
|
|
56
|
+
when Time
|
|
57
|
+
normalize_time(input)
|
|
58
|
+
|
|
59
|
+
when String
|
|
60
|
+
normalize_string(input)
|
|
61
|
+
|
|
62
|
+
else
|
|
63
|
+
raise ArgumentError, "Unsupported timeframe type: #{input.class}. " \
|
|
64
|
+
"Expected nil, Range, Array<Range>, Date, DateTime, Time, String, or :auto"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Check if a value is a valid timeframe input
|
|
69
|
+
#
|
|
70
|
+
# @param input [Object] Value to check
|
|
71
|
+
# @return [Boolean]
|
|
72
|
+
#
|
|
73
|
+
def valid?(input)
|
|
74
|
+
case input
|
|
75
|
+
when nil, :auto, Range, Date, DateTime, Time, String
|
|
76
|
+
true
|
|
77
|
+
when Array
|
|
78
|
+
input.all? { |r| r.is_a?(Range) }
|
|
79
|
+
else
|
|
80
|
+
false
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
# Normalize :auto - extract timeframe from query text
|
|
87
|
+
#
|
|
88
|
+
# @param query [String] Query text to parse
|
|
89
|
+
# @return [Result] Result with :timeframe, :query (cleaned), :extracted (original expression)
|
|
90
|
+
#
|
|
91
|
+
def normalize_auto(query)
|
|
92
|
+
raise ArgumentError, "query is required when timeframe is :auto" if query.nil? || query.strip.empty?
|
|
93
|
+
|
|
94
|
+
result = HTM::TimeframeExtractor.extract(query)
|
|
95
|
+
|
|
96
|
+
Result.new(
|
|
97
|
+
timeframe: result.timeframe,
|
|
98
|
+
query: result.query,
|
|
99
|
+
extracted: result.original_expression
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Normalize an array of ranges
|
|
104
|
+
#
|
|
105
|
+
# @param array [Array] Array of Range objects
|
|
106
|
+
# @return [Array<Range>] Validated array of ranges
|
|
107
|
+
#
|
|
108
|
+
def normalize_array(array)
|
|
109
|
+
raise ArgumentError, "Array timeframe cannot be empty" if array.empty?
|
|
110
|
+
|
|
111
|
+
array.map do |item|
|
|
112
|
+
case item
|
|
113
|
+
when Range
|
|
114
|
+
validate_range!(item)
|
|
115
|
+
item
|
|
116
|
+
when Date
|
|
117
|
+
normalize_date(item)
|
|
118
|
+
when DateTime
|
|
119
|
+
normalize_datetime(item)
|
|
120
|
+
when Time
|
|
121
|
+
normalize_time(item)
|
|
122
|
+
when String
|
|
123
|
+
normalize_string(item)
|
|
124
|
+
else
|
|
125
|
+
raise ArgumentError, "Array elements must be Range, Date, DateTime, Time, or String. Got: #{item.class}"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Normalize a Date to a Range spanning the entire day
|
|
131
|
+
#
|
|
132
|
+
# @param date [Date] Date to normalize
|
|
133
|
+
# @return [Range] Time range for entire day
|
|
134
|
+
#
|
|
135
|
+
def normalize_date(date)
|
|
136
|
+
# Convert Date to Time at beginning of day in local timezone
|
|
137
|
+
beginning = Time.new(date.year, date.month, date.day, 0, 0, 0)
|
|
138
|
+
ending = Time.new(date.year, date.month, date.day, 23, 59, 59)
|
|
139
|
+
beginning..ending
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Normalize a DateTime to a Range spanning the entire day
|
|
143
|
+
#
|
|
144
|
+
# @param datetime [DateTime] DateTime to normalize
|
|
145
|
+
# @return [Range] Time range for entire day containing this moment
|
|
146
|
+
#
|
|
147
|
+
def normalize_datetime(datetime)
|
|
148
|
+
# Extract date portion and create full day range
|
|
149
|
+
normalize_date(datetime.to_date)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Normalize a Time to a Range spanning the entire day
|
|
153
|
+
#
|
|
154
|
+
# @param time [Time] Time to normalize
|
|
155
|
+
# @return [Range] Time range for entire day containing this moment
|
|
156
|
+
#
|
|
157
|
+
def normalize_time(time)
|
|
158
|
+
beginning = Time.new(time.year, time.month, time.day, 0, 0, 0, time.utc_offset)
|
|
159
|
+
ending = Time.new(time.year, time.month, time.day, 23, 59, 59, time.utc_offset)
|
|
160
|
+
beginning..ending
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Normalize a String using TimeframeExtractor
|
|
164
|
+
#
|
|
165
|
+
# @param string [String] Natural language timeframe
|
|
166
|
+
# @return [Range, nil] Parsed timeframe or nil if unparseable
|
|
167
|
+
#
|
|
168
|
+
def normalize_string(string)
|
|
169
|
+
return nil if string.nil? || string.strip.empty?
|
|
170
|
+
|
|
171
|
+
result = HTM::TimeframeExtractor.extract(string)
|
|
172
|
+
|
|
173
|
+
# If extraction found a timeframe, return it
|
|
174
|
+
return result.timeframe if result.timeframe
|
|
175
|
+
|
|
176
|
+
# Fall back to treating the whole string as a timeframe expression
|
|
177
|
+
# (for cases like just "last week" without surrounding text)
|
|
178
|
+
fallback = HTM::TimeframeExtractor.extract("show me #{string}")
|
|
179
|
+
fallback.timeframe
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Validate that a Range has Time-compatible begin/end
|
|
183
|
+
#
|
|
184
|
+
# @param range [Range] Range to validate
|
|
185
|
+
# @raise [ArgumentError] If range is invalid
|
|
186
|
+
#
|
|
187
|
+
def validate_range!(range)
|
|
188
|
+
unless range.begin.respond_to?(:to_time) && range.end.respond_to?(:to_time)
|
|
189
|
+
raise ArgumentError, "Range must have Time-compatible begin and end values"
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'chronic'
|
|
4
|
+
|
|
5
|
+
class HTM
|
|
6
|
+
# Timeframe Extractor - Extracts temporal expressions from queries
|
|
7
|
+
#
|
|
8
|
+
# This service parses natural language time expressions from recall queries
|
|
9
|
+
# and returns both the timeframe and the cleaned query text.
|
|
10
|
+
#
|
|
11
|
+
# Supports:
|
|
12
|
+
# - Standard time expressions via Chronic gem ("yesterday", "last week", etc.)
|
|
13
|
+
# - "few" keyword mapped to FEW constant (e.g., "few days ago" → "3 days ago")
|
|
14
|
+
# - "recent/recently" without units defaults to FEW days
|
|
15
|
+
#
|
|
16
|
+
# @example Basic usage
|
|
17
|
+
# result = TimeframeExtractor.extract("what did we discuss last week about PostgreSQL")
|
|
18
|
+
# result[:query] # => "what did we discuss about PostgreSQL"
|
|
19
|
+
# result[:timeframe] # => #<Range: 2025-11-21..2025-11-28>
|
|
20
|
+
#
|
|
21
|
+
# @example With "few" keyword
|
|
22
|
+
# result = TimeframeExtractor.extract("show me notes from a few days ago")
|
|
23
|
+
# result[:timeframe] # => Time object for 3 days ago
|
|
24
|
+
#
|
|
25
|
+
# @example With "recently"
|
|
26
|
+
# result = TimeframeExtractor.extract("what did we recently discuss")
|
|
27
|
+
# result[:timeframe] # => Range from 3 days ago to now
|
|
28
|
+
#
|
|
29
|
+
class TimeframeExtractor
|
|
30
|
+
# The numeric value for "few" and "recently" without units
|
|
31
|
+
FEW = 3
|
|
32
|
+
|
|
33
|
+
# Default unit for "recently" when no time unit is specified
|
|
34
|
+
DEFAULT_RECENT_UNIT = :days
|
|
35
|
+
|
|
36
|
+
# Time unit patterns for matching
|
|
37
|
+
TIME_UNITS = %w[
|
|
38
|
+
seconds? minutes? hours? days? weeks? months? years?
|
|
39
|
+
].join('|').freeze
|
|
40
|
+
|
|
41
|
+
# Word-to-number mapping for written numbers
|
|
42
|
+
WORD_NUMBERS = {
|
|
43
|
+
'one' => 1, 'two' => 2, 'three' => 3, 'four' => 4, 'five' => 5,
|
|
44
|
+
'six' => 6, 'seven' => 7, 'eight' => 8, 'nine' => 9, 'ten' => 10
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
47
|
+
# Patterns for temporal expressions (order matters - more specific first)
|
|
48
|
+
# Each pattern should match ORIGINAL text (including "few", "a few")
|
|
49
|
+
TEMPORAL_PATTERNS = [
|
|
50
|
+
# "between X and Y" - date ranges
|
|
51
|
+
/\bbetween\s+(.+?)\s+and\s+(.+?)(?=\s+(?:about|regarding|for|on|with)|$)/i,
|
|
52
|
+
|
|
53
|
+
# "from X to Y" - date ranges
|
|
54
|
+
/\bfrom\s+(.+?)\s+to\s+(.+?)(?=\s+(?:about|regarding|for|on|with)|$)/i,
|
|
55
|
+
|
|
56
|
+
# "since X" - from date to now
|
|
57
|
+
/\bsince\s+(.+?)(?=\s+(?:about|regarding|for|on|with)|$)/i,
|
|
58
|
+
|
|
59
|
+
# "before/after X"
|
|
60
|
+
/\b(before|after)\s+(.+?)(?=\s+(?:about|regarding|for|on|with)|$)/i,
|
|
61
|
+
|
|
62
|
+
# "in the last/past X units" (including "few", "a few", "several")
|
|
63
|
+
/\bin\s+the\s+(?:last|past)\s+(?:\d+|few|a\s+few|several)\s+(?:#{TIME_UNITS})/i,
|
|
64
|
+
|
|
65
|
+
# "weekend before last" / "the weekend before last"
|
|
66
|
+
/\b(?:the\s+)?weekend\s+before\s+last\b/i,
|
|
67
|
+
|
|
68
|
+
# "N weekends ago" (numeric or written)
|
|
69
|
+
/\b(?:\d+|one|two|three|four|five|six|seven|eight|nine|ten|few|a\s+few|several)\s+weekends?\s+ago\b/i,
|
|
70
|
+
|
|
71
|
+
# "a few X ago" or "few X ago"
|
|
72
|
+
/\b(?:a\s+)?few\s+(?:#{TIME_UNITS})\s+ago\b/i,
|
|
73
|
+
|
|
74
|
+
# "X units ago"
|
|
75
|
+
/\b\d+\s+(?:#{TIME_UNITS})\s+ago\b/i,
|
|
76
|
+
|
|
77
|
+
# "last/this/next weekend"
|
|
78
|
+
/\b(?:last|this|next)\s+weekend\b/i,
|
|
79
|
+
|
|
80
|
+
# "last/this/next X" (week, month, year, monday, etc.)
|
|
81
|
+
/\b(?:last|this|next)\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i,
|
|
82
|
+
|
|
83
|
+
# "recently" or "recent" as standalone or with context
|
|
84
|
+
/\b(?:recently|recent)\b/i,
|
|
85
|
+
|
|
86
|
+
# Standard time words
|
|
87
|
+
/\b(?:yesterday|today|tonight|this\s+morning|this\s+afternoon|this\s+evening|last\s+night)\b/i,
|
|
88
|
+
].freeze
|
|
89
|
+
|
|
90
|
+
# Result structure for extracted timeframe
|
|
91
|
+
Result = Struct.new(:query, :timeframe, :original_expression, keyword_init: true)
|
|
92
|
+
|
|
93
|
+
class << self
|
|
94
|
+
# Extract timeframe from a query string
|
|
95
|
+
#
|
|
96
|
+
# @param query [String] The query to parse
|
|
97
|
+
# @return [Result] Struct with :query (cleaned), :timeframe, :original_expression
|
|
98
|
+
#
|
|
99
|
+
def extract(query)
|
|
100
|
+
return Result.new(query: query, timeframe: nil, original_expression: nil) if query.nil? || query.strip.empty?
|
|
101
|
+
|
|
102
|
+
# Try each pattern against the ORIGINAL query
|
|
103
|
+
TEMPORAL_PATTERNS.each do |pattern|
|
|
104
|
+
match = query.match(pattern)
|
|
105
|
+
next unless match
|
|
106
|
+
|
|
107
|
+
original_expression = match[0].strip
|
|
108
|
+
timeframe = parse_expression(original_expression)
|
|
109
|
+
next unless timeframe
|
|
110
|
+
|
|
111
|
+
# Remove the matched expression from query
|
|
112
|
+
cleaned_query = clean_query(query, original_expression)
|
|
113
|
+
|
|
114
|
+
return Result.new(
|
|
115
|
+
query: cleaned_query,
|
|
116
|
+
timeframe: timeframe,
|
|
117
|
+
original_expression: original_expression
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# No temporal expression found
|
|
122
|
+
Result.new(query: query, timeframe: nil, original_expression: nil)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Check if query contains a temporal expression
|
|
126
|
+
#
|
|
127
|
+
# @param query [String] The query to check
|
|
128
|
+
# @return [Boolean]
|
|
129
|
+
#
|
|
130
|
+
def temporal?(query)
|
|
131
|
+
return false if query.nil? || query.strip.empty?
|
|
132
|
+
|
|
133
|
+
TEMPORAL_PATTERNS.any? { |pattern| query.match?(pattern) }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
private
|
|
137
|
+
|
|
138
|
+
# Normalize "few" and "a few" to the FEW constant value
|
|
139
|
+
#
|
|
140
|
+
# @param text [String] Text to normalize
|
|
141
|
+
# @return [String] Normalized text
|
|
142
|
+
#
|
|
143
|
+
def normalize_few_keywords(text)
|
|
144
|
+
text
|
|
145
|
+
.gsub(/\ba\s+few\b/i, FEW.to_s)
|
|
146
|
+
.gsub(/\bfew\b/i, FEW.to_s)
|
|
147
|
+
.gsub(/\bseveral\b/i, FEW.to_s)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Parse a temporal expression into a timeframe
|
|
151
|
+
#
|
|
152
|
+
# @param expression [String] The temporal expression
|
|
153
|
+
# @return [Time, Range, nil] Parsed timeframe
|
|
154
|
+
#
|
|
155
|
+
def parse_expression(expression)
|
|
156
|
+
# Handle "recently/recent" specially - default to FEW days
|
|
157
|
+
if expression.match?(/\b(?:recently|recent)\b/i)
|
|
158
|
+
return parse_recent
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Handle "weekend before last" - 2 weekends ago
|
|
162
|
+
if expression.match?(/\bweekend\s+before\s+last\b/i)
|
|
163
|
+
return parse_weekends_ago(2)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Handle "N weekends ago" (numeric or written)
|
|
167
|
+
if match = expression.match(/\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten|few|a\s+few|several)\s+weekends?\s+ago\b/i)
|
|
168
|
+
count = parse_number(match[1])
|
|
169
|
+
return parse_weekends_ago(count)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Normalize "few" to numeric value for Chronic
|
|
173
|
+
normalized = normalize_few_keywords(expression)
|
|
174
|
+
|
|
175
|
+
# Handle "in the last/past X units" - create range from X ago to now
|
|
176
|
+
if match = normalized.match(/(?:in\s+the\s+)?(?:last|past)\s+(\d+)\s+(#{TIME_UNITS})/i)
|
|
177
|
+
return parse_last_x(match[1].to_i, match[2])
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Strip "in the" prefix for Chronic
|
|
181
|
+
chronic_expr = normalized.gsub(/\bin\s+the\s+/i, '')
|
|
182
|
+
|
|
183
|
+
# Get week_start from HTM configuration (default: :sunday)
|
|
184
|
+
week_start = :sunday
|
|
185
|
+
if defined?(HTM) && HTM.respond_to?(:configuration)
|
|
186
|
+
week_start = HTM.configuration.week_start
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Try to get a span/range first
|
|
190
|
+
result = Chronic.parse(chronic_expr, guess: false, week_start: week_start)
|
|
191
|
+
|
|
192
|
+
# Convert Chronic::Span to Range if needed
|
|
193
|
+
if result.respond_to?(:begin) && result.respond_to?(:end)
|
|
194
|
+
return result.begin..result.end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Fall back to point in time
|
|
198
|
+
Chronic.parse(chronic_expr, week_start: week_start)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Parse a number from string (numeric or written word)
|
|
202
|
+
#
|
|
203
|
+
# @param str [String] Number as digit or word
|
|
204
|
+
# @return [Integer] Parsed number
|
|
205
|
+
#
|
|
206
|
+
def parse_number(str)
|
|
207
|
+
normalized = str.downcase.strip
|
|
208
|
+
return FEW if normalized == 'few' || normalized == 'a few' || normalized == 'several'
|
|
209
|
+
return WORD_NUMBERS[normalized] if WORD_NUMBERS.key?(normalized)
|
|
210
|
+
normalized.to_i
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Parse "N weekends ago" to a Saturday-Sunday range
|
|
214
|
+
#
|
|
215
|
+
# @param count [Integer] Number of weekends ago (1 = last weekend)
|
|
216
|
+
# @return [Range] Time range for that weekend (Saturday 00:00 to Monday 00:00)
|
|
217
|
+
#
|
|
218
|
+
def parse_weekends_ago(count)
|
|
219
|
+
now = Time.now
|
|
220
|
+
|
|
221
|
+
# Find last Saturday (most recent Saturday before or equal to today)
|
|
222
|
+
days_since_saturday = (now.wday - 6) % 7
|
|
223
|
+
days_since_saturday = 7 if days_since_saturday == 0 && now.wday != 6 # If today is Sunday, last Saturday was yesterday
|
|
224
|
+
|
|
225
|
+
last_saturday = Time.new(now.year, now.month, now.day, 0, 0, 0) - (days_since_saturday * 24 * 60 * 60)
|
|
226
|
+
|
|
227
|
+
# Go back (count - 1) more weeks to get to the target weekend
|
|
228
|
+
# count=1 means "last weekend" = the most recent past weekend
|
|
229
|
+
# count=2 means "weekend before last" = 2 weekends ago
|
|
230
|
+
target_saturday = last_saturday - ((count - 1) * 7 * 24 * 60 * 60)
|
|
231
|
+
|
|
232
|
+
# Weekend spans Saturday 00:00 to Monday 00:00
|
|
233
|
+
weekend_start = target_saturday
|
|
234
|
+
weekend_end = target_saturday + (2 * 24 * 60 * 60) # Monday 00:00
|
|
235
|
+
|
|
236
|
+
weekend_start..weekend_end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Parse "last X units" or "past X units" to a proper range
|
|
240
|
+
#
|
|
241
|
+
# @param count [Integer] Number of units
|
|
242
|
+
# @param unit [String] Time unit (days, hours, etc.)
|
|
243
|
+
# @return [Range] Time range from count units ago to now
|
|
244
|
+
#
|
|
245
|
+
def parse_last_x(count, unit)
|
|
246
|
+
now = Time.now
|
|
247
|
+
unit_normalized = unit.downcase.sub(/s$/, '') # Remove trailing 's'
|
|
248
|
+
|
|
249
|
+
seconds = case unit_normalized
|
|
250
|
+
when 'second' then count
|
|
251
|
+
when 'minute' then count * 60
|
|
252
|
+
when 'hour' then count * 60 * 60
|
|
253
|
+
when 'day' then count * 24 * 60 * 60
|
|
254
|
+
when 'week' then count * 7 * 24 * 60 * 60
|
|
255
|
+
when 'month' then count * 30 * 24 * 60 * 60
|
|
256
|
+
when 'year' then count * 365 * 24 * 60 * 60
|
|
257
|
+
else count * 24 * 60 * 60 # Default to days
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
(now - seconds)..now
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Parse "recently" to a range from FEW days ago to now
|
|
264
|
+
#
|
|
265
|
+
# @return [Range] Time range
|
|
266
|
+
#
|
|
267
|
+
def parse_recent
|
|
268
|
+
now = Time.now
|
|
269
|
+
case DEFAULT_RECENT_UNIT
|
|
270
|
+
when :seconds
|
|
271
|
+
(now - FEW)..now
|
|
272
|
+
when :minutes
|
|
273
|
+
(now - (FEW * 60))..now
|
|
274
|
+
when :hours
|
|
275
|
+
(now - (FEW * 60 * 60))..now
|
|
276
|
+
when :days
|
|
277
|
+
(now - (FEW * 24 * 60 * 60))..now
|
|
278
|
+
when :weeks
|
|
279
|
+
(now - (FEW * 7 * 24 * 60 * 60))..now
|
|
280
|
+
when :months
|
|
281
|
+
(now - (FEW * 30 * 24 * 60 * 60))..now
|
|
282
|
+
when :years
|
|
283
|
+
(now - (FEW * 365 * 24 * 60 * 60))..now
|
|
284
|
+
else
|
|
285
|
+
(now - (FEW * 24 * 60 * 60))..now
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Clean the query by removing the temporal expression
|
|
290
|
+
#
|
|
291
|
+
# @param query [String] Original query
|
|
292
|
+
# @param expression [String] Expression to remove
|
|
293
|
+
# @return [String] Cleaned query
|
|
294
|
+
#
|
|
295
|
+
def clean_query(query, expression)
|
|
296
|
+
# Escape special regex characters in the expression
|
|
297
|
+
escaped = Regexp.escape(expression)
|
|
298
|
+
|
|
299
|
+
query
|
|
300
|
+
.sub(/#{escaped}/i, '') # Remove the expression
|
|
301
|
+
.gsub(/\s{2,}/, ' ') # Collapse multiple spaces
|
|
302
|
+
.gsub(/\s+([,.])/, '\1') # Fix space before punctuation
|
|
303
|
+
.strip
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
end
|
data/lib/htm/version.rb
CHANGED