cli-community-intelligence 0.1.18 → 0.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +23 -6
- package/dist/index.js +2 -0
- package/openclaw-extension.js +18 -1
- package/openclaw.plugin.json +3 -2
- package/package.json +7 -3
package/dist/cli.js
CHANGED
|
@@ -5680,6 +5680,7 @@ var queryPosts = (db, { source, channel, text, author, since, until, minScore, l
|
|
|
5680
5680
|
const sql = `SELECT * FROM posts ${where} ORDER BY createdAt DESC ${limitClause}`;
|
|
5681
5681
|
return db.prepare(sql).all(...params);
|
|
5682
5682
|
};
|
|
5683
|
+
var querySql = (db, sql) => db.prepare(sql).all();
|
|
5683
5684
|
var CorpusDb = {
|
|
5684
5685
|
createDb,
|
|
5685
5686
|
persistPost,
|
|
@@ -5692,6 +5693,7 @@ var CorpusDb = {
|
|
|
5692
5693
|
queryPostBySourceId,
|
|
5693
5694
|
queryRevisitQueue,
|
|
5694
5695
|
queryScrapeRuns,
|
|
5696
|
+
querySql,
|
|
5695
5697
|
updateQueueEntry,
|
|
5696
5698
|
removeFromQueue,
|
|
5697
5699
|
countChildPosts
|
|
@@ -5819,11 +5821,11 @@ var T2 = {
|
|
|
5819
5821
|
return cutoff.toISOString();
|
|
5820
5822
|
},
|
|
5821
5823
|
// Determine the cutoff date for a subreddit based on incremental/backfill settings
|
|
5822
|
-
//
|
|
5824
|
+
// WHY: default to 30 days so bare `scrape reddit` has predictable scope
|
|
5825
|
+
// @sig toCutoffForSubreddit :: (String, Boolean, Number?, Function?) -> String
|
|
5823
5826
|
toCutoffForSubreddit: (subreddit, incremental, backfillDays, getLastCreatedAt) => {
|
|
5824
5827
|
if (incremental && getLastCreatedAt) return getLastCreatedAt("reddit", subreddit);
|
|
5825
|
-
|
|
5826
|
-
return void 0;
|
|
5828
|
+
return T2.toCutoffDate(backfillDays || DEFAULT_BACKFILL_DAYS);
|
|
5827
5829
|
},
|
|
5828
5830
|
// Build a scrape configuration object for a single subreddit
|
|
5829
5831
|
// @sig toScrapeConfig :: (Object, String, Boolean, Number?, Function?) -> Object
|
|
@@ -5942,6 +5944,7 @@ var POSTS_PER_PAGE_UNAUTH = 25;
|
|
|
5942
5944
|
var RATE_LIMIT_DELAY_AUTH = 700;
|
|
5943
5945
|
var RATE_LIMIT_DELAY_UNAUTH2 = RedditApi.RATE_LIMIT_DELAY_UNAUTH;
|
|
5944
5946
|
var MAX_POSTS_PER_SUBREDDIT = 1e3;
|
|
5947
|
+
var DEFAULT_BACKFILL_DAYS = 30;
|
|
5945
5948
|
var gatherReddit = async ({ subreddits, backfillDays, incremental, getLastCreatedAt }) => {
|
|
5946
5949
|
const subredditList = RedditApi.toSubredditList(subreddits);
|
|
5947
5950
|
const auth = await E.requestToken();
|
|
@@ -6346,6 +6349,14 @@ var E3 = {
|
|
|
6346
6349
|
E3.emitStatus(CorpusDb.queryStatus(db));
|
|
6347
6350
|
db.close();
|
|
6348
6351
|
},
|
|
6352
|
+
// Handle sql command — run a read-only SQL query against the corpus
|
|
6353
|
+
// @sig handleSql :: Object -> void
|
|
6354
|
+
handleSql: (argv) => {
|
|
6355
|
+
const db = CorpusDb.createDb(argv.db);
|
|
6356
|
+
const rows = CorpusDb.querySql(db, argv.sql);
|
|
6357
|
+
console.log(JSON.stringify(rows, void 0, 2));
|
|
6358
|
+
db.close();
|
|
6359
|
+
},
|
|
6349
6360
|
// Handle query command — output matching posts as formatted text
|
|
6350
6361
|
// @sig handleQuery :: Object -> void
|
|
6351
6362
|
handleQuery: (argv) => {
|
|
@@ -6416,8 +6427,9 @@ ${posts.length} post(s) found
|
|
|
6416
6427
|
() => posts.forEach((post) => E3.persistPostRevisitEntry(db, post, moreCounts[post.sourceId] || 0))
|
|
6417
6428
|
);
|
|
6418
6429
|
enqueueAll();
|
|
6430
|
+
const dupes = allPosts.length - inserted;
|
|
6419
6431
|
console.log(`
|
|
6420
|
-
Stored ${
|
|
6432
|
+
Stored ${posts.length} posts and ${comments.length} comments (${dupes} duplicates skipped)`);
|
|
6421
6433
|
console.log(`Queued ${posts.length} posts for revisit`);
|
|
6422
6434
|
db.close();
|
|
6423
6435
|
},
|
|
@@ -6477,7 +6489,7 @@ Stored ${inserted} new posts (${posts.length - inserted} duplicates skipped)`);
|
|
|
6477
6489
|
// Run the CLI — parse args and dispatch to handlers
|
|
6478
6490
|
// @sig runCli :: () -> void
|
|
6479
6491
|
runCli: () => {
|
|
6480
|
-
const { handleExport, handleFatalError, handleQuery, handleRevisit, handleScrape, handleStatus } = E3;
|
|
6492
|
+
const { handleExport, handleFatalError, handleQuery, handleRevisit, handleScrape, handleSql, handleStatus } = E3;
|
|
6481
6493
|
const { buildFilterOptions, buildRevisitOptions, buildScrapeOptions } = F3;
|
|
6482
6494
|
try {
|
|
6483
6495
|
yargs_default(hideBin(process.argv)).usage("Usage: community-intelligence <command> [options]").option("db", {
|
|
@@ -6489,7 +6501,12 @@ Stored ${inserted} new posts (${posts.length - inserted} duplicates skipped)`);
|
|
|
6489
6501
|
"Re-fetch queued posts for new comments",
|
|
6490
6502
|
buildRevisitOptions,
|
|
6491
6503
|
handleRevisit
|
|
6492
|
-
).command("query", "Search the corpus", buildFilterOptions, handleQuery).command("export", "Export matching posts as JSON to stdout", buildFilterOptions, handleExport).
|
|
6504
|
+
).command("query", "Search the corpus", buildFilterOptions, handleQuery).command("export", "Export matching posts as JSON to stdout", buildFilterOptions, handleExport).command(
|
|
6505
|
+
"sql <sql>",
|
|
6506
|
+
"Run a read-only SQL query against the corpus",
|
|
6507
|
+
(y) => y.positional("sql", { describe: "SQL query to execute", type: "string" }),
|
|
6508
|
+
handleSql
|
|
6509
|
+
).demandCommand(1, "Please specify a command").help().alias("help", "h").version().alias("version", "v").strict().parse();
|
|
6493
6510
|
} catch (error) {
|
|
6494
6511
|
handleFatalError(error);
|
|
6495
6512
|
}
|
package/dist/index.js
CHANGED
|
@@ -823,6 +823,7 @@ var queryPosts = (db, { source, channel, text, author, since, until, minScore, l
|
|
|
823
823
|
const sql = `SELECT * FROM posts ${where} ORDER BY createdAt DESC ${limitClause}`;
|
|
824
824
|
return db.prepare(sql).all(...params);
|
|
825
825
|
};
|
|
826
|
+
var querySql = (db, sql) => db.prepare(sql).all();
|
|
826
827
|
var CorpusDb = {
|
|
827
828
|
createDb,
|
|
828
829
|
persistPost,
|
|
@@ -835,6 +836,7 @@ var CorpusDb = {
|
|
|
835
836
|
queryPostBySourceId,
|
|
836
837
|
queryRevisitQueue,
|
|
837
838
|
queryScrapeRuns,
|
|
839
|
+
querySql,
|
|
838
840
|
updateQueueEntry,
|
|
839
841
|
removeFromQueue,
|
|
840
842
|
countChildPosts
|
package/openclaw-extension.js
CHANGED
|
@@ -75,10 +75,15 @@ const queryFilterParams = Type.Object({
|
|
|
75
75
|
//
|
|
76
76
|
// ---------------------------------------------------------------------------------------------------------------------
|
|
77
77
|
|
|
78
|
+
// COMPLEXITY: export-structure — OpenClaw's definePluginEntry requires export default
|
|
79
|
+
// @sig OpenClawExtension :: OpenClawPluginDefinition
|
|
78
80
|
export default definePluginEntry({
|
|
79
81
|
id: 'cli-community-intelligence',
|
|
80
82
|
name: 'Community Intelligence',
|
|
81
83
|
description: 'Scrape and query posts from online communities (Reddit, YouTube)',
|
|
84
|
+
|
|
85
|
+
// Register community-intelligence tools with OpenClaw's plugin API
|
|
86
|
+
// @sig register :: OpenClawPluginApi -> void
|
|
82
87
|
register(api) {
|
|
83
88
|
api.registerTool({
|
|
84
89
|
name: 'community-intelligence-scrape',
|
|
@@ -104,7 +109,8 @@ export default definePluginEntry({
|
|
|
104
109
|
api.registerTool({
|
|
105
110
|
name: 'community-intelligence-query',
|
|
106
111
|
description:
|
|
107
|
-
'Search the scraped community posts corpus.
|
|
112
|
+
'Search the scraped community posts corpus. ' +
|
|
113
|
+
'Filter by source, channel, text, author, date range, or score.',
|
|
108
114
|
parameters: queryFilterParams,
|
|
109
115
|
execute: F.buildExecutor('query'),
|
|
110
116
|
})
|
|
@@ -122,5 +128,16 @@ export default definePluginEntry({
|
|
|
122
128
|
parameters: queryFilterParams,
|
|
123
129
|
execute: F.buildExecutor('export'),
|
|
124
130
|
})
|
|
131
|
+
|
|
132
|
+
api.registerTool({
|
|
133
|
+
name: 'community-intelligence-sql',
|
|
134
|
+
description:
|
|
135
|
+
'Run a read-only SQL query against the community intelligence corpus. ' +
|
|
136
|
+
'The database has a "posts" table with columns: source, sourceId, channel, ' +
|
|
137
|
+
'author, title, body, parentId, url, score, createdAt, postType, scrapedAt. ' +
|
|
138
|
+
'Use standard SQLite syntax for aggregations, grouping, etc.',
|
|
139
|
+
parameters: Type.Object({ sql: Type.String({ description: 'SQL query to execute (read-only)' }) }),
|
|
140
|
+
execute: F.buildExecutor('sql'),
|
|
141
|
+
})
|
|
125
142
|
},
|
|
126
143
|
})
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
"id": "cli-community-intelligence",
|
|
3
3
|
"name": "Community Intelligence",
|
|
4
4
|
"description": "Scrape and query posts from online communities (Reddit, YouTube)",
|
|
5
|
-
"version": "0.1.
|
|
5
|
+
"version": "0.1.20",
|
|
6
6
|
"contracts": {
|
|
7
7
|
"tools": [
|
|
8
8
|
"community-intelligence-scrape",
|
|
9
9
|
"community-intelligence-query",
|
|
10
10
|
"community-intelligence-status",
|
|
11
|
-
"community-intelligence-export"
|
|
11
|
+
"community-intelligence-export",
|
|
12
|
+
"community-intelligence-sql"
|
|
12
13
|
]
|
|
13
14
|
},
|
|
14
15
|
"configSchema": {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cli-community-intelligence",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.20",
|
|
4
4
|
"description": "Community intelligence scraper for construction industry market research",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -21,7 +21,11 @@
|
|
|
21
21
|
}
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
|
-
"better-sqlite3": "^11.0.0",
|
|
25
24
|
"@sinclair/typebox": "0.34.49"
|
|
26
|
-
}
|
|
25
|
+
},
|
|
26
|
+
"bundledDependencies": [
|
|
27
|
+
"better-sqlite3",
|
|
28
|
+
"bindings",
|
|
29
|
+
"file-uri-to-path"
|
|
30
|
+
]
|
|
27
31
|
}
|