cli-community-intelligence 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "cli-community-intelligence",
3
+ "version": "0.1.5",
4
+ "description": "Community intelligence scraper for construction industry market research",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "community-intelligence": "dist/cli.js"
9
+ },
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/graffio/graffio-monorepo.git"
13
+ },
14
+ "dependencies": {
15
+ "better-sqlite3": "^8.7.0"
16
+ }
17
+ }
package/schema.sql ADDED
@@ -0,0 +1,66 @@
1
+ -- =====================================================
2
+ -- Community Intelligence Corpus Schema
3
+ -- =====================================================
4
+ --
5
+ -- Stores scraped posts and comments from online communities.
6
+ -- Column names use camelCase to match JavaScript conventions.
7
+ --
8
+
9
+ CREATE TABLE IF NOT EXISTS posts (
10
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
11
+ source TEXT NOT NULL, -- 'reddit', 'youtube', 'forum'
12
+ sourceId TEXT NOT NULL, -- Original ID from source platform
13
+ channel TEXT NOT NULL, -- Subreddit, YouTube channel, forum name
14
+ author TEXT, -- Username (may be null if deleted)
15
+ title TEXT, -- Post title (null for comments)
16
+ body TEXT NOT NULL, -- Post or comment text content
17
+ parentId TEXT, -- Parent post sourceId (for comments)
18
+ url TEXT, -- Permalink
19
+ score INTEGER, -- Upvotes, likes, etc.
20
+ createdAt TEXT NOT NULL, -- ISO 8601 timestamp from source
21
+ scrapedAt TEXT NOT NULL DEFAULT (datetime('now')), -- When we scraped it
22
+ postType TEXT NOT NULL CHECK (postType IN ('post', 'comment'))
23
+ );
24
+
25
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_posts_source_sourceId ON posts(source, sourceId);
26
+ CREATE INDEX IF NOT EXISTS idx_posts_channel ON posts(channel);
27
+ CREATE INDEX IF NOT EXISTS idx_posts_createdAt ON posts(createdAt);
28
+ CREATE INDEX IF NOT EXISTS idx_posts_source_channel_createdAt ON posts(source, channel, createdAt DESC);
29
+
30
+ -- =====================================================
31
+ -- Scrape Runs — logs each scrape operation for analytics
32
+ -- =====================================================
33
+
34
+ CREATE TABLE IF NOT EXISTS scrape_runs (
35
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
36
+ source TEXT NOT NULL, -- 'reddit', 'youtube', 'forum'
37
+ channel TEXT NOT NULL, -- Subreddit, YouTube channel, forum name
38
+ startedAt TEXT NOT NULL, -- ISO 8601 timestamp
39
+ completedAt TEXT NOT NULL, -- ISO 8601 timestamp
40
+ postsFound INTEGER NOT NULL DEFAULT 0,
41
+ postsInserted INTEGER NOT NULL DEFAULT 0,
42
+ commentsFound INTEGER NOT NULL DEFAULT 0,
43
+ commentsInserted INTEGER NOT NULL DEFAULT 0
44
+ );
45
+
46
+ CREATE INDEX IF NOT EXISTS idx_scrape_runs_source_channel ON scrape_runs(source, channel);
47
+
48
+ -- =====================================================
49
+ -- Revisit Queue — tracks posts needing follow-up scrapes
50
+ -- =====================================================
51
+ --
52
+ -- One entry per post. reason is the most specific: has_more > high_score > recent.
53
+ -- Exit conditions (OR): post age > threshold OR unchangedCount > threshold.
54
+ --
55
+
56
+ CREATE TABLE IF NOT EXISTS revisit_queue (
57
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
58
+ source TEXT NOT NULL, -- 'reddit', 'youtube', 'forum'
59
+ sourceId TEXT NOT NULL, -- Post sourceId from the posts table
60
+ reason TEXT NOT NULL CHECK (reason IN ('recent', 'high_score', 'has_more')),
61
+ moreCount INTEGER NOT NULL DEFAULT 0, -- Count of "more" placeholder objects in comment tree
62
+ lastCheckedAt TEXT, -- ISO 8601 timestamp of last revisit
63
+ unchangedCount INTEGER NOT NULL DEFAULT 0 -- Consecutive revisits with zero new comments
64
+ );
65
+
66
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_revisit_queue_source_sourceId ON revisit_queue(source, sourceId);