cli-community-intelligence 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -5680,6 +5680,7 @@ var queryPosts = (db, { source, channel, text, author, since, until, minScore, l
5680
5680
  const sql = `SELECT * FROM posts ${where} ORDER BY createdAt DESC ${limitClause}`;
5681
5681
  return db.prepare(sql).all(...params);
5682
5682
  };
5683
+ var querySql = (db, sql) => db.prepare(sql).all();
5683
5684
  var CorpusDb = {
5684
5685
  createDb,
5685
5686
  persistPost,
@@ -5692,6 +5693,7 @@ var CorpusDb = {
5692
5693
  queryPostBySourceId,
5693
5694
  queryRevisitQueue,
5694
5695
  queryScrapeRuns,
5696
+ querySql,
5695
5697
  updateQueueEntry,
5696
5698
  removeFromQueue,
5697
5699
  countChildPosts
@@ -5819,11 +5821,11 @@ var T2 = {
5819
5821
  return cutoff.toISOString();
5820
5822
  },
5821
5823
  // Determine the cutoff date for a subreddit based on incremental/backfill settings
5822
- // @sig toCutoffForSubreddit :: (String, Boolean, Number?, Function?) -> String?
5824
+ // WHY: default to 30 days so bare `scrape reddit` has predictable scope
5825
+ // @sig toCutoffForSubreddit :: (String, Boolean, Number?, Function?) -> String
5823
5826
  toCutoffForSubreddit: (subreddit, incremental, backfillDays, getLastCreatedAt) => {
5824
5827
  if (incremental && getLastCreatedAt) return getLastCreatedAt("reddit", subreddit);
5825
- if (backfillDays) return T2.toCutoffDate(backfillDays);
5826
- return void 0;
5828
+ return T2.toCutoffDate(backfillDays || DEFAULT_BACKFILL_DAYS);
5827
5829
  },
5828
5830
  // Build a scrape configuration object for a single subreddit
5829
5831
  // @sig toScrapeConfig :: (Object, String, Boolean, Number?, Function?) -> Object
@@ -5942,6 +5944,7 @@ var POSTS_PER_PAGE_UNAUTH = 25;
5942
5944
  var RATE_LIMIT_DELAY_AUTH = 700;
5943
5945
  var RATE_LIMIT_DELAY_UNAUTH2 = RedditApi.RATE_LIMIT_DELAY_UNAUTH;
5944
5946
  var MAX_POSTS_PER_SUBREDDIT = 1e3;
5947
+ var DEFAULT_BACKFILL_DAYS = 30;
5945
5948
  var gatherReddit = async ({ subreddits, backfillDays, incremental, getLastCreatedAt }) => {
5946
5949
  const subredditList = RedditApi.toSubredditList(subreddits);
5947
5950
  const auth = await E.requestToken();
@@ -6346,6 +6349,14 @@ var E3 = {
6346
6349
  E3.emitStatus(CorpusDb.queryStatus(db));
6347
6350
  db.close();
6348
6351
  },
6352
+ // Handle sql command — run a read-only SQL query against the corpus
6353
+ // @sig handleSql :: Object -> void
6354
+ handleSql: (argv) => {
6355
+ const db = CorpusDb.createDb(argv.db);
6356
+ const rows = CorpusDb.querySql(db, argv.sql);
6357
+ console.log(JSON.stringify(rows, void 0, 2));
6358
+ db.close();
6359
+ },
6349
6360
  // Handle query command — output matching posts as formatted text
6350
6361
  // @sig handleQuery :: Object -> void
6351
6362
  handleQuery: (argv) => {
@@ -6416,8 +6427,9 @@ ${posts.length} post(s) found
6416
6427
  () => posts.forEach((post) => E3.persistPostRevisitEntry(db, post, moreCounts[post.sourceId] || 0))
6417
6428
  );
6418
6429
  enqueueAll();
6430
+ const dupes = allPosts.length - inserted;
6419
6431
  console.log(`
6420
- Stored ${inserted} new posts (${allPosts.length - inserted} duplicates skipped)`);
6432
+ Stored ${posts.length} posts and ${comments.length} comments (${dupes} duplicates skipped)`);
6421
6433
  console.log(`Queued ${posts.length} posts for revisit`);
6422
6434
  db.close();
6423
6435
  },
@@ -6477,7 +6489,7 @@ Stored ${inserted} new posts (${posts.length - inserted} duplicates skipped)`);
6477
6489
  // Run the CLI — parse args and dispatch to handlers
6478
6490
  // @sig runCli :: () -> void
6479
6491
  runCli: () => {
6480
- const { handleExport, handleFatalError, handleQuery, handleRevisit, handleScrape, handleStatus } = E3;
6492
+ const { handleExport, handleFatalError, handleQuery, handleRevisit, handleScrape, handleSql, handleStatus } = E3;
6481
6493
  const { buildFilterOptions, buildRevisitOptions, buildScrapeOptions } = F3;
6482
6494
  try {
6483
6495
  yargs_default(hideBin(process.argv)).usage("Usage: community-intelligence <command> [options]").option("db", {
@@ -6489,7 +6501,12 @@ Stored ${inserted} new posts (${posts.length - inserted} duplicates skipped)`);
6489
6501
  "Re-fetch queued posts for new comments",
6490
6502
  buildRevisitOptions,
6491
6503
  handleRevisit
6492
- ).command("query", "Search the corpus", buildFilterOptions, handleQuery).command("export", "Export matching posts as JSON to stdout", buildFilterOptions, handleExport).demandCommand(1, "Please specify a command").help().alias("help", "h").version().alias("version", "v").strict().parse();
6504
+ ).command("query", "Search the corpus", buildFilterOptions, handleQuery).command("export", "Export matching posts as JSON to stdout", buildFilterOptions, handleExport).command(
6505
+ "sql <sql>",
6506
+ "Run a read-only SQL query against the corpus",
6507
+ (y) => y.positional("sql", { describe: "SQL query to execute", type: "string" }),
6508
+ handleSql
6509
+ ).demandCommand(1, "Please specify a command").help().alias("help", "h").version().alias("version", "v").strict().parse();
6493
6510
  } catch (error) {
6494
6511
  handleFatalError(error);
6495
6512
  }
package/dist/index.js CHANGED
@@ -823,6 +823,7 @@ var queryPosts = (db, { source, channel, text, author, since, until, minScore, l
823
823
  const sql = `SELECT * FROM posts ${where} ORDER BY createdAt DESC ${limitClause}`;
824
824
  return db.prepare(sql).all(...params);
825
825
  };
826
+ var querySql = (db, sql) => db.prepare(sql).all();
826
827
  var CorpusDb = {
827
828
  createDb,
828
829
  persistPost,
@@ -835,6 +836,7 @@ var CorpusDb = {
835
836
  queryPostBySourceId,
836
837
  queryRevisitQueue,
837
838
  queryScrapeRuns,
839
+ querySql,
838
840
  updateQueueEntry,
839
841
  removeFromQueue,
840
842
  countChildPosts
@@ -122,5 +122,16 @@ export default definePluginEntry({
122
122
  parameters: queryFilterParams,
123
123
  execute: F.buildExecutor('export'),
124
124
  })
125
+
126
+ api.registerTool({
127
+ name: 'community-intelligence-sql',
128
+ description:
129
+ 'Run a read-only SQL query against the community intelligence corpus. ' +
130
+ 'The database has a "posts" table with columns: source, sourceId, channel, ' +
131
+ 'author, title, body, parentId, url, score, createdAt, postType, scrapedAt. ' +
132
+ 'Use standard SQLite syntax for aggregations, grouping, etc.',
133
+ parameters: Type.Object({ sql: Type.String({ description: 'SQL query to execute (read-only)' }) }),
134
+ execute: F.buildExecutor('sql'),
135
+ })
125
136
  },
126
137
  })
@@ -2,18 +2,16 @@
2
2
  "id": "cli-community-intelligence",
3
3
  "name": "Community Intelligence",
4
4
  "description": "Scrape and query posts from online communities (Reddit, YouTube)",
5
- "version": "0.1.17",
5
+ "version": "0.1.19",
6
6
  "contracts": {
7
7
  "tools": [
8
8
  "community-intelligence-scrape",
9
9
  "community-intelligence-query",
10
10
  "community-intelligence-status",
11
- "community-intelligence-export"
11
+ "community-intelligence-export",
12
+ "community-intelligence-sql"
12
13
  ]
13
14
  },
14
- "capabilities": {
15
- "childProcess": true
16
- },
17
15
  "configSchema": {
18
16
  "type": "object",
19
17
  "additionalProperties": false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cli-community-intelligence",
3
- "version": "0.1.17",
3
+ "version": "0.1.19",
4
4
  "description": "Community intelligence scraper for construction industry market research",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -20,8 +20,11 @@
20
20
  "minGatewayVersion": "2026.3.28"
21
21
  }
22
22
  },
23
+ "scripts": {
24
+ "postinstall": "npm rebuild better-sqlite3"
25
+ },
23
26
  "dependencies": {
24
- "better-sqlite3": "^8.7.0",
27
+ "better-sqlite3": "^11.0.0",
25
28
  "@sinclair/typebox": "0.34.49"
26
29
  }
27
30
  }