factorio-ai-tools 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- factorio_ai_tools-1.1.1/.agents/AGENTS.md +11 -0
- factorio_ai_tools-1.1.1/.gitattributes +32 -0
- factorio_ai_tools-1.1.1/.github/release-drafter.yml +28 -0
- factorio_ai_tools-1.1.1/.github/workflows/docker-publish.yml +46 -0
- factorio_ai_tools-1.1.1/.github/workflows/publish-databases.yml +27 -0
- factorio_ai_tools-1.1.1/.github/workflows/pypi-publish.yml +27 -0
- factorio_ai_tools-1.1.1/.github/workflows/release-drafter.yml +22 -0
- factorio_ai_tools-1.1.1/.gitignore +15 -0
- factorio_ai_tools-1.1.1/CLAUDE.md +74 -0
- factorio_ai_tools-1.1.1/Dockerfile +19 -0
- factorio_ai_tools-1.1.1/LICENSE +21 -0
- factorio_ai_tools-1.1.1/Makefile +19 -0
- factorio_ai_tools-1.1.1/PKG-INFO +137 -0
- factorio_ai_tools-1.1.1/README.md +118 -0
- factorio_ai_tools-1.1.1/docs/assets/factorio-ai-tools.png +0 -0
- factorio_ai_tools-1.1.1/maintenance/compact_lancedb.py +168 -0
- factorio_ai_tools-1.1.1/maintenance/hooks/pre-push +42 -0
- factorio_ai_tools-1.1.1/make.bat +16 -0
- factorio_ai_tools-1.1.1/pyproject.toml +32 -0
- factorio_ai_tools-1.1.1/requirements.txt +76 -0
- factorio_ai_tools-1.1.1/setup.cfg +4 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/__init__.py +1 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/__init__.py +0 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/compact_db.py +27 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/ingest_clusterio.py +0 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/ingest_factorio.py +0 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/ingest_forum.py +0 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/ingest_github_repo.py +263 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/ingest/ingest_wiki.py +0 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools/server.py +544 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools.egg-info/PKG-INFO +137 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools.egg-info/SOURCES.txt +36 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools.egg-info/dependency_links.txt +1 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools.egg-info/entry_points.txt +2 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools.egg-info/requires.txt +9 -0
- factorio_ai_tools-1.1.1/src/factorio_ai_tools.egg-info/top_level.txt +1 -0
- factorio_ai_tools-1.1.1/start_mcp_server.bat +4 -0
- factorio_ai_tools-1.1.1/uv.lock +2176 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
## Python Windows Terminal Printing
|
|
2
|
+
When writing Python scripts that scrape the web or process LLM responses, NEVER directly `print()` raw dynamic strings. Windows PowerShell default encodings will throw a fatal `UnicodeEncodeError` when encountering characters like en-dashes or emojis.
|
|
3
|
+
*Fix:* Always encode/decode before printing:
|
|
4
|
+
```python
|
|
5
|
+
print(text.encode('ascii', 'replace').decode('ascii'))
|
|
6
|
+
```
|
|
7
|
+
|
|
8
|
+
## Comprehensive Git Commits
|
|
9
|
+
When tasked with committing and pushing code to a repository, NEVER assume you have tracked all necessary files based on memory.
|
|
10
|
+
- Before confirming to the user that "all changes have been pushed", you MUST run `git status` to explicitly verify that no unexpectedly modified or untracked files were left behind in the working directory.
|
|
11
|
+
- Ensure that you either use `git add .` (if safe) or meticulously stage every modified file related to the current task before committing.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Make line endings deterministic across machines, editors, and CI, so the
|
|
2
|
+
# repository stops drifting between CRLF and LF (and Git stops warning that
|
|
3
|
+
# "LF will be replaced by CRLF"). This file is authoritative and overrides
|
|
4
|
+
# each developer's personal `core.autocrlf` setting for this project.
|
|
5
|
+
|
|
6
|
+
# Default: let Git auto-detect text vs binary. Text is stored as LF in the repo.
|
|
7
|
+
* text=auto
|
|
8
|
+
|
|
9
|
+
# Source and config: enforce LF in the working tree too (not just in the repo),
|
|
10
|
+
# so the checked-out files always match what is committed. The repo is already
|
|
11
|
+
# all-LF, so this introduces no content churn.
|
|
12
|
+
*.py text eol=lf
|
|
13
|
+
*.md text eol=lf
|
|
14
|
+
*.txt text eol=lf
|
|
15
|
+
*.yaml text eol=lf
|
|
16
|
+
*.yml text eol=lf
|
|
17
|
+
*.cfg text eol=lf
|
|
18
|
+
*.toml text eol=lf
|
|
19
|
+
*.json text eol=lf
|
|
20
|
+
*.sh text eol=lf
|
|
21
|
+
.gitignore text eol=lf
|
|
22
|
+
.gitattributes text eol=lf
|
|
23
|
+
|
|
24
|
+
# Git hooks are run by sh and must keep LF endings (no file extension, so the
|
|
25
|
+
# rules above don't catch them); CRLF would break them on Windows checkout.
|
|
26
|
+
maintenance/hooks/* text eol=lf
|
|
27
|
+
|
|
28
|
+
# Pre-built LanceDB vector stores are binary data — never apply line-ending
|
|
29
|
+
# (or any) conversion. These rules come last so they win over the patterns
|
|
30
|
+
# above for any text-looking files (hint .json, version.txt) inside the stores.
|
|
31
|
+
data/*_lancedb/** binary
|
|
32
|
+
*.lance binary
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name-template: 'v$NEXT_PATCH_VERSION'
|
|
2
|
+
tag-template: 'v$NEXT_PATCH_VERSION'
|
|
3
|
+
categories:
|
|
4
|
+
- title: '🚀 Features'
|
|
5
|
+
labels:
|
|
6
|
+
- 'feature'
|
|
7
|
+
- 'enhancement'
|
|
8
|
+
- title: '🐛 Bug Fixes'
|
|
9
|
+
labels:
|
|
10
|
+
- 'fix'
|
|
11
|
+
- 'bug'
|
|
12
|
+
- title: '📊 Dataset Updates'
|
|
13
|
+
labels:
|
|
14
|
+
- 'dataset'
|
|
15
|
+
- 'data'
|
|
16
|
+
- title: '🧰 Maintenance'
|
|
17
|
+
labels:
|
|
18
|
+
- 'chore'
|
|
19
|
+
- 'refactor'
|
|
20
|
+
- 'dependencies'
|
|
21
|
+
change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
|
|
22
|
+
template: |
|
|
23
|
+
## Changes
|
|
24
|
+
|
|
25
|
+
$CHANGES
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
*Automated release generated by Release Drafter.*
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: Build and Publish Docker (GHCR)
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
env:
|
|
9
|
+
REGISTRY: ghcr.io
|
|
10
|
+
IMAGE_NAME: ${{ github.repository }}
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build-and-push-image:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
permissions:
|
|
16
|
+
contents: read
|
|
17
|
+
packages: write
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout repository
|
|
21
|
+
uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Log in to the Container registry
|
|
24
|
+
uses: docker/login-action@v3
|
|
25
|
+
with:
|
|
26
|
+
registry: ${{ env.REGISTRY }}
|
|
27
|
+
username: ${{ github.actor }}
|
|
28
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
29
|
+
|
|
30
|
+
- name: Extract metadata (tags, labels) for Docker
|
|
31
|
+
id: meta
|
|
32
|
+
uses: docker/metadata-action@v5
|
|
33
|
+
with:
|
|
34
|
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
35
|
+
tags: |
|
|
36
|
+
type=semver,pattern={{version}}
|
|
37
|
+
type=semver,pattern={{major}}.{{minor}}
|
|
38
|
+
type=raw,value=latest,enable={{is_default_branch}}
|
|
39
|
+
|
|
40
|
+
- name: Build and push Docker image
|
|
41
|
+
uses: docker/build-push-action@v5
|
|
42
|
+
with:
|
|
43
|
+
context: .
|
|
44
|
+
push: true
|
|
45
|
+
tags: ${{ steps.meta.outputs.tags }}
|
|
46
|
+
labels: ${{ steps.meta.outputs.labels }}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish Databases to GitHub Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish-databases:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout repository
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Zip LanceDB folders
|
|
19
|
+
run: |
|
|
20
|
+
cd data
|
|
21
|
+
zip -r ../factorio_lancedb.zip factorio_lancedb/ clusterio_lancedb/ wiki_lancedb/ mod_lancedb/ forum_lancedb/
|
|
22
|
+
|
|
23
|
+
- name: Upload databases zip to Release
|
|
24
|
+
uses: softprops/action-gh-release@v2
|
|
25
|
+
if: github.event_name == 'release'
|
|
26
|
+
with:
|
|
27
|
+
files: factorio_lancedb.zip
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish-pypi:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
13
|
+
contents: read
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.11"
|
|
22
|
+
- name: Install build tool
|
|
23
|
+
run: pip install build
|
|
24
|
+
- name: Build package
|
|
25
|
+
run: python -m build
|
|
26
|
+
- name: Publish to PyPI
|
|
27
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: Release Drafter
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
# pull_request event is required only for autolabeler
|
|
8
|
+
pull_request:
|
|
9
|
+
types: [opened, reopened, synchronize]
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
update_release_draft:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: release-drafter/release-drafter@v6
|
|
19
|
+
with:
|
|
20
|
+
config-name: release-drafter.yml
|
|
21
|
+
env:
|
|
22
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
A hybrid-search RAG system and FastMCP server that gives LLMs expert knowledge of Factorio modding and Clusterio plugin development. Five ingestion scripts scrape/parse external sources into local LanceDB vector stores; `server.py` exposes those stores to MCP clients (Claude Desktop, etc.) as search tools.
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
```powershell
|
|
12
|
+
# One-time setup
|
|
13
|
+
python -m venv venv
|
|
14
|
+
.\venv\Scripts\Activate.ps1
|
|
15
|
+
pip install -r requirements.txt
|
|
16
|
+
|
|
17
|
+
# Build/refresh the vector databases (each is idempotent — safe to re-run).
|
|
18
|
+
# Scripts live in ingest/; every store is written under data/.
|
|
19
|
+
python ingest/ingest_factorio.py # -> data/factorio_lancedb (Lua API + prototype docs, multiple versions)
|
|
20
|
+
python ingest/ingest_clusterio.py # -> data/clusterio_lancedb (set CLUSTERIO_REPO; defaults to ./clusterio)
|
|
21
|
+
python ingest/ingest_wiki.py # -> data/wiki_lancedb (full Factorio wiki via MediaWiki API)
|
|
22
|
+
python ingest/ingest_forum.py # -> data/forum_lancedb (curated topics from forum_links.txt)
|
|
23
|
+
python ingest/ingest_github_mod.py --repo-url https://github.com/notnotmelon/maraxsis # -> data/mod_lancedb
|
|
24
|
+
|
|
25
|
+
# Compact/prune all data/*_lancedb stores (collapses LanceDB version history).
|
|
26
|
+
# Run before merging data changes to main; --check is a read-only guard.
|
|
27
|
+
python maintenance/compact_lancedb.py
|
|
28
|
+
python maintenance/compact_lancedb.py --check
|
|
29
|
+
|
|
30
|
+
# Run the MCP server (stdio transport)
|
|
31
|
+
python server.py
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
There is no test suite, linter, or build step beyond the above. `smithery.yaml` defines the Smithery deployment (build = pip install + the three core ingest scripts; run = `python server.py`).
|
|
35
|
+
|
|
36
|
+
To test a tool manually, import `server.py` in a REPL — the `@mcp.tool()` functions are plain callables. Note that importing `server.py` eagerly loads the SentenceTransformer model and opens all five LanceDB connections.
|
|
37
|
+
|
|
38
|
+
## Architecture
|
|
39
|
+
|
|
40
|
+
**Ingestion → LanceDB → MCP server.** Every database is built offline by an ingest script and queried at runtime by the server. The two halves only share the embedding model and the on-disk store; they never call each other.
|
|
41
|
+
|
|
42
|
+
**Shared embedding contract (must stay consistent across all scripts and the server):**
|
|
43
|
+
- Model: `BAAI/bge-base-en-v1.5`, overridable via `EMBEDDING_MODEL` env var. Device auto-selects CUDA→CPU.
|
|
44
|
+
- Vectors are **768-dim** and **L2-normalized** (`normalize_embeddings=True`). Any new ingest script or schema must match this dimension and normalization, or search breaks silently.
|
|
45
|
+
|
|
46
|
+
**Incremental / idempotent ingestion.** Every script hashes source content with SHA-256 (`get_hash`/`hashlib.sha256`) and stores it as `content_hash`. On re-run it compares hashes per source unit (per URL, per file, per wiki page, per forum topic) and **skips unchanged content; deletes-then-re-adds changed content.** This is why re-running an ingest script is cheap and safe.
|
|
47
|
+
|
|
48
|
+
**Schema migration guard.** Each ingest script checks whether the existing table has the current columns (e.g. `if "content_hash" not in table.schema.names`) and **drops + recreates the whole table** if the schema is stale. Changing a `LanceModel`/pyarrow schema therefore forces a full re-ingest of that store — account for that when editing schemas.
|
|
49
|
+
|
|
50
|
+
**Code-aware chunking via Tree-sitter.** `ingest_clusterio.py` (TypeScript) and `ingest_github_mod.py` (Lua) parse files into AST nodes (classes, functions, methods, interfaces/tables) using a Tree-sitter `Query`, and store each node as a chunk with `node_type`/`node_name`. Non-code files fall back to fixed-size sliding-window text chunking (`extract_text_chunks`, 1500 chars / 200 overlap). The doc/wiki/forum scripts use plain text chunking only.
|
|
51
|
+
|
|
52
|
+
**Per-store table names and key columns** (the server opens these by exact name; all stores live under `data/`):
|
|
53
|
+
- `data/factorio_lancedb` → table `docs`: `text`, `class_name`, `version`, `url`, `node_type`. Holds **multiple Factorio versions** (`["1.1.110", "latest"]`); search filters by `version`. Writes `version.txt`.
|
|
54
|
+
- `data/clusterio_lancedb` → table `codebase`: `content`, `file_path`, `node_type`, `node_name`. Writes `version.txt` from the repo's `package.json`.
|
|
55
|
+
- `data/wiki_lancedb` → table `docs`: `text`, `title`, `url`.
|
|
56
|
+
- `data/forum_lancedb` → table `forum`: `content`, `class_name` (= topic title), `file_path` (= URL), `version`.
|
|
57
|
+
- `data/mod_lancedb` → table `codebase`: `content`, `repo_url`, `file_path`, `node_type`, `node_name`. One store holds **multiple mods**; search filters by `mod_name` via `repo_url LIKE`.
|
|
58
|
+
|
|
59
|
+
**Server resilience.** `server.py` opens each table in its own try/except and sets the handle to `None` on failure, so a missing database degrades only the affected tool (which returns a "run ingest_X.py first" error) rather than crashing the server. Search tools accept a **list of queries** (batched encode) and clamp `limit` to 1–20.
|
|
60
|
+
|
|
61
|
+
**Non-search tools** in `server.py` are self-contained (no DB): `decode_factorio_blueprint`/`encode_factorio_blueprint` (base64+zlib, version byte `0`, 10 MB decompress guard), `factorio_mod_portal_analyzer` (mods.factorio.com API), `factorio_log_inspector` (OS-aware path to `factorio-current.log`), `get_mcp_version_info` (reads the `version.txt` files).
|
|
62
|
+
|
|
63
|
+
## Conventions (from `.agents/AGENTS.md`)
|
|
64
|
+
|
|
65
|
+
- **Windows console printing:** never `print()` raw dynamic/scraped strings — PowerShell's default encoding throws `UnicodeEncodeError` on en-dashes/emojis. Wrap dynamic output: `print(text.encode('ascii', 'replace').decode('ascii'))`. The ingest scripts already do this; preserve it.
|
|
66
|
+
- **Committing:** always run `git status` to verify nothing modified/untracked is left behind before telling the user changes are pushed. Stage deliberately.
|
|
67
|
+
- **SQL-string safety:** LanceDB `.where()` clauses are built with f-strings, so user/dynamic values are escaped by doubling single quotes (`value.replace("'", "''")`). Keep this when adding filters.
|
|
68
|
+
|
|
69
|
+
## Data files & git
|
|
70
|
+
|
|
71
|
+
- The `data/*_lancedb/` stores are committed to the repo and marked `binary` in `.gitattributes` (no line-ending conversion). `.mod_temp/` (clone scratch for `ingest_github_mod.py`, at repo root), `venv/`, `__pycache__/`, `.claude/`, and `.env` are gitignored.
|
|
72
|
+
- `.gitattributes` enforces LF line endings for all text/source files regardless of `core.autocrlf`.
|
|
73
|
+
- `forum_links.txt` is the curated input list for `ingest_forum.py` (one URL per line, `#` comments allowed).
|
|
74
|
+
- **LanceDB hygiene.** Stores are append-only and never self-prune, so each ingest run accumulates versions/fragments. Let that history grow on feature branches (a PR diff then shows what data changed), but run `python maintenance/compact_lancedb.py` before merging to `main` to collapse it (`Table.optimize()` with `cleanup_older_than=0`). `maintenance/hooks/pre-push` is an opt-in guard (`git config core.hooksPath maintenance/hooks`) that blocks pushes to `main` while any store is uncompacted, via `compact_lancedb.py --check`.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
# Install git for any tree-sitter or fetching dependencies
|
|
4
|
+
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
|
5
|
+
|
|
6
|
+
WORKDIR /app
|
|
7
|
+
|
|
8
|
+
# Copy requirements first to leverage Docker cache
|
|
9
|
+
COPY requirements.txt .
|
|
10
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
11
|
+
|
|
12
|
+
# Copy all python scripts and LanceDB vector databases
|
|
13
|
+
COPY . .
|
|
14
|
+
|
|
15
|
+
# Ensure the mcp server binds to stdio properly and PYTHONPATH is set for the src layout
|
|
16
|
+
ENV PYTHONUNBUFFERED=1
|
|
17
|
+
ENV PYTHONPATH=/app/src
|
|
18
|
+
|
|
19
|
+
ENTRYPOINT ["python", "-m", "factorio_ai_tools.server"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 solarcloud7
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
.PHONY: help compact ingest-all mcp
|
|
2
|
+
|
|
3
|
+
help:
|
|
4
|
+
@echo "Available commands:"
|
|
5
|
+
@echo " make compact - Compact and condense the LanceDB database"
|
|
6
|
+
@echo " make ingest-all - Run the ingestion script for all configured repositories"
|
|
7
|
+
@echo " make mcp - Start the MCP server"
|
|
8
|
+
|
|
9
|
+
compact:
|
|
10
|
+
uv run --no-sync python src/factorio_ai_tools/ingest/compact_db.py
|
|
11
|
+
|
|
12
|
+
ingest-all:
|
|
13
|
+
uv run --no-sync python src/factorio_ai_tools/ingest/ingest_github_repo.py --repo-url https://github.com/clusterio/clusterio-docker.git
|
|
14
|
+
uv run --no-sync python src/factorio_ai_tools/ingest/ingest_github_repo.py --repo-url https://github.com/wube/factorio-data.git
|
|
15
|
+
uv run --no-sync python src/factorio_ai_tools/ingest/ingest_github_repo.py --repo-url https://github.com/redruin1/factorio-draftsman.git
|
|
16
|
+
uv run --no-sync python src/factorio_ai_tools/ingest/ingest_github_repo.py --repo-url https://github.com/Teoxoy/factorio-blueprint-editor.git
|
|
17
|
+
|
|
18
|
+
mcp:
|
|
19
|
+
.\start_mcp_server.bat
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: factorio-ai-tools
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: A lightning-fast, hybrid-search Vector Database and Model Context Protocol (MCP) server for Factorio modding.
|
|
5
|
+
Author: solarcloud7
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: mcp[cli]>=1.1.2
|
|
10
|
+
Requires-Dist: lancedb>=0.17.0
|
|
11
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
12
|
+
Requires-Dist: pyarrow>=15.0.0
|
|
13
|
+
Requires-Dist: requests>=2.31.0
|
|
14
|
+
Requires-Dist: hf-transfer>=0.1.0
|
|
15
|
+
Requires-Dist: tree-sitter-lua>=0.2.0
|
|
16
|
+
Requires-Dist: torch>=2.12.1
|
|
17
|
+
Requires-Dist: sentence-transformers>=5.6.0
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
<div align="center">
|
|
21
|
+
<img src="docs/assets/factorio-ai-tools.png" alt="Factorio AI Tools Icon" width="200"/>
|
|
22
|
+
<br/>
|
|
23
|
+
<a href="https://pypi.org/project/factorio-ai-tools/"><img src="https://img.shields.io/pypi/v/factorio-ai-tools" alt="PyPI - Version"/></a>
|
|
24
|
+
<a href="https://github.com/solarcloud7/factorio-ai-tools/releases"><img src="https://img.shields.io/github/v/release/solarcloud7/factorio-ai-tools" alt="GitHub Release"/></a>
|
|
25
|
+
</div>
|
|
26
|
+
|
|
27
|
+
# Factorio AI Tools (MCP Server)
|
|
28
|
+
|
|
29
|
+
A lightning-fast, hybrid-search Vector Database and Model Context Protocol (MCP) server designed to give LLMs absolute expertise over Factorio modding and Clusterio plugin development.
|
|
30
|
+
|
|
31
|
+
## Architecture
|
|
32
|
+
|
|
33
|
+
This project consists of 4 main components:
|
|
34
|
+
1. **Factorio Docs Ingestion (`ingest_factorio.py`)**: Scrapes the official Lua API documentation and Data Phase Prototypes across multiple versions (e.g. `1.1.110` and `latest`).
|
|
35
|
+
2. **Clusterio Codebase Ingestion (`ingest_clusterio.py`)**: Uses AST (Abstract Syntax Tree) parsing to semantically chunk the massive Node.js/TypeScript Clusterio plugin architecture.
|
|
36
|
+
3. **Factorio Wiki Ingestion (`ingest_wiki.py`)**: Scrapes the official Factorio Wiki via the MediaWiki API, exclusively extracting English wikitext for gameplay mechanics, ratios, and formulas.
|
|
37
|
+
4. **GitHub Mod Ingestion (`ingest_github_mod.py`)**: A generalized pipeline that clones, AST-parses (via `tree-sitter-lua`), and incrementally hashes any GitHub Mod codebase (e.g., Maraxsis) into a semantic `mod_lancedb` index.
|
|
38
|
+
5. **FastMCP Server (`server.py`)**: The bridge that connects the underlying LanceDB vector databases to an LLM via the standard Model Context Protocol.
|
|
39
|
+
|
|
40
|
+
## Setup & Usage
|
|
41
|
+
|
|
42
|
+
There are two primary ways to install and use this MCP server locally with Claude Desktop (or any other MCP client):
|
|
43
|
+
|
|
44
|
+
### Method 1: Using `uvx` (Recommended)
|
|
45
|
+
If you have `uv` installed, this is the cleanest way to run the server. It will automatically download the package from PyPI and fetch the necessary vector databases on the first run.
|
|
46
|
+
Add the following to your Claude Desktop config (`%APPDATA%\Claude\claude_desktop_config.json` or `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"mcpServers": {
|
|
50
|
+
"factorio-ai-tools": {
|
|
51
|
+
"command": "uvx",
|
|
52
|
+
"args": ["factorio-ai-tools"]
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Method 2: Docker (Pre-packaged Datasets)
|
|
59
|
+
If you have Docker Desktop installed, you can simply pull the pre-packaged container natively. The Docker container includes the databases inside the image, so no additional downloads are required at runtime.
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"mcpServers": {
|
|
63
|
+
"factorio-ai-tools": {
|
|
64
|
+
"command": "docker",
|
|
65
|
+
"args": ["run", "-i", "--rm", "ghcr.io/solarcloud7/factorio-ai-tools:latest"]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Method 3: Global SSE Server (Save RAM/VRAM)
|
|
73
|
+
By default, standard `stdio` MCP execution spawns a completely separate Python process for *every single client connection*. Because this server uses PyTorch and `sentence-transformers`, every connection will load the embedding model again, consuming roughly ~500MB of RAM/VRAM per instance.
|
|
74
|
+
|
|
75
|
+
If you want to use the MCP server across multiple IDEs or workspaces simultaneously without duplicating memory, you can run a single global HTTP SSE server in the background:
|
|
76
|
+
|
|
77
|
+
```powershell
|
|
78
|
+
uv run factorio-ai-tools --sse --port 8000
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Then, configure your IDE or Claude client to connect to the SSE endpoint (e.g., `http://localhost:8000/sse`) instead of executing the CLI via `stdio`.
|
|
82
|
+
|
|
83
|
+
### Selective Tool Loading (Optional)
|
|
84
|
+
By default, the server loads all available tools. If you only want to expose specific tools to your LLM, you can use the `--enable-tools` or `--disable-tools` arguments.
|
|
85
|
+
For example, to *only* load the doc search and the blueprint decoder using `uvx`:
|
|
86
|
+
```json
|
|
87
|
+
"command": "uvx",
|
|
88
|
+
"args": [
|
|
89
|
+
"factorio-ai-tools",
|
|
90
|
+
"--enable-tools", "search_factorio_docs,decode_factorio_blueprint"
|
|
91
|
+
]
|
|
92
|
+
```
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### Manual Developer Setup
|
|
96
|
+
If you wish to run the python scripts manually or ingest custom codebases:
|
|
97
|
+
1. Create a python virtual environment: `python -m venv venv` and activate it.
|
|
98
|
+
2. Run `pip install -r requirements.txt`.
|
|
99
|
+
3. *(Optional)* Run the ingestion scripts (`python -m factorio_ai_tools.ingest.ingest_factorio`, etc.) to rebuild the LanceDB tables.
|
|
100
|
+
4. *(Optional)* Ingest a specific GitHub Mod:
|
|
101
|
+
```powershell
|
|
102
|
+
python -m factorio_ai_tools.ingest.ingest_github_mod --repo-url https://github.com/notnotmelon/maraxsis
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Maintenance (Database Hygiene)
|
|
106
|
+
|
|
107
|
+
LanceDB is append-only: every ingest run adds new immutable versions and small data fragments, and **nothing is garbage-collected automatically**. Re-running an ingest script grows the on-disk history (e.g. `factorio_lancedb` had 155 versions / 469 files before its first compaction). To keep the committed stores lean:
|
|
108
|
+
|
|
109
|
+
```powershell
|
|
110
|
+
python maintenance/compact_lancedb.py # compact + prune every data/*_lancedb store
|
|
111
|
+
python maintenance/compact_lancedb.py --check # read-only; exits non-zero if a store is uncompacted
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
This runs LanceDB's `Table.optimize()` on each store — compacting fragments, pruning old versions, and folding new rows into existing indices. Do **not** run it while the server or an ingest script is writing.
|
|
115
|
+
|
|
116
|
+
**Recommended workflow:** let the version history accumulate on feature branches so a PR diff shows exactly what data changed, then run the compaction script before merging to `main` so the committed history stays collapsed.
|
|
117
|
+
|
|
118
|
+
To enforce that automatically, opt into the bundled pre-push guard (it blocks pushes to `main` while any store is uncompacted):
|
|
119
|
+
|
|
120
|
+
```powershell
|
|
121
|
+
git config core.hooksPath maintenance/hooks
|
|
122
|
+
# or copy maintenance/hooks/pre-push into .git/hooks/
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
## Tools Included
|
|
128
|
+
|
|
129
|
+
- `search_factorio_docs`: Look up Lua Runtime API methods, concepts, events, and Data Phase prototypes. Supports version filtering (`1.1.110` vs `latest`).
|
|
130
|
+
- `search_clusterio_code`: Semantically search the Clusterio Node.js architecture.
|
|
131
|
+
- `search_factorio_wiki`: Access game mechanics, ratios, and fluid mechanics straight from the Wiki.
|
|
132
|
+
- `search_mod_code`: Semantically search through specific downloaded GitHub mods (e.g., `maraxsis`) to read their Lua codebase.
|
|
133
|
+
- `decode_factorio_blueprint`: Convert Factorio blueprint strings (e.g. `0eNq...`) into easily readable/editable JSON.
|
|
134
|
+
- `encode_factorio_blueprint`: Compress generated JSON back into an importable Factorio blueprint string.
|
|
135
|
+
- `factorio_mod_portal_analyzer`: Scrape and summarize the Factorio Mod Portal for any given mod to retrieve dependencies and release versions.
|
|
136
|
+
|
|
137
|
+
- `get_mcp_version_info`: Self-diagnostics tool to verify the currently loaded database versions.
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="docs/assets/factorio-ai-tools.png" alt="Factorio AI Tools Icon" width="200"/>
|
|
3
|
+
<br/>
|
|
4
|
+
<a href="https://pypi.org/project/factorio-ai-tools/"><img src="https://img.shields.io/pypi/v/factorio-ai-tools" alt="PyPI - Version"/></a>
|
|
5
|
+
<a href="https://github.com/solarcloud7/factorio-ai-tools/releases"><img src="https://img.shields.io/github/v/release/solarcloud7/factorio-ai-tools" alt="GitHub Release"/></a>
|
|
6
|
+
</div>
|
|
7
|
+
|
|
8
|
+
# Factorio AI Tools (MCP Server)
|
|
9
|
+
|
|
10
|
+
A lightning-fast, hybrid-search Vector Database and Model Context Protocol (MCP) server designed to give LLMs absolute expertise over Factorio modding and Clusterio plugin development.
|
|
11
|
+
|
|
12
|
+
## Architecture
|
|
13
|
+
|
|
14
|
+
This project consists of 4 main components:
|
|
15
|
+
1. **Factorio Docs Ingestion (`ingest_factorio.py`)**: Scrapes the official Lua API documentation and Data Phase Prototypes across multiple versions (e.g. `1.1.110` and `latest`).
|
|
16
|
+
2. **Clusterio Codebase Ingestion (`ingest_clusterio.py`)**: Uses AST (Abstract Syntax Tree) parsing to semantically chunk the massive Node.js/TypeScript Clusterio plugin architecture.
|
|
17
|
+
3. **Factorio Wiki Ingestion (`ingest_wiki.py`)**: Scrapes the official Factorio Wiki via the MediaWiki API, exclusively extracting English wikitext for gameplay mechanics, ratios, and formulas.
|
|
18
|
+
4. **GitHub Mod Ingestion (`ingest_github_mod.py`)**: A generalized pipeline that clones, AST-parses (via `tree-sitter-lua`), and incrementally hashes any GitHub Mod codebase (e.g., Maraxsis) into a semantic `mod_lancedb` index.
|
|
19
|
+
5. **FastMCP Server (`server.py`)**: The bridge that connects the underlying LanceDB vector databases to an LLM via the standard Model Context Protocol.
|
|
20
|
+
|
|
21
|
+
## Setup & Usage
|
|
22
|
+
|
|
23
|
+
There are two primary ways to install and use this MCP server locally with Claude Desktop (or any other MCP client):
|
|
24
|
+
|
|
25
|
+
### Method 1: Using `uvx` (Recommended)
|
|
26
|
+
If you have `uv` installed, this is the cleanest way to run the server. It will automatically download the package from PyPI and fetch the necessary vector databases on the first run.
|
|
27
|
+
Add the following to your Claude Desktop config (`%APPDATA%\Claude\claude_desktop_config.json` or `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"mcpServers": {
|
|
31
|
+
"factorio-ai-tools": {
|
|
32
|
+
"command": "uvx",
|
|
33
|
+
"args": ["factorio-ai-tools"]
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Method 2: Docker (Pre-packaged Datasets)
|
|
40
|
+
If you have Docker Desktop installed, you can simply pull the pre-packaged container natively. The Docker container includes the databases inside the image, so no additional downloads are required at runtime.
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"mcpServers": {
|
|
44
|
+
"factorio-ai-tools": {
|
|
45
|
+
"command": "docker",
|
|
46
|
+
"args": ["run", "-i", "--rm", "ghcr.io/solarcloud7/factorio-ai-tools:latest"]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Method 3: Global SSE Server (Save RAM/VRAM)
|
|
54
|
+
By default, standard `stdio` MCP execution spawns a completely separate Python process for *every single client connection*. Because this server uses PyTorch and `sentence-transformers`, every connection will load the embedding model again, consuming roughly ~500MB of RAM/VRAM per instance.
|
|
55
|
+
|
|
56
|
+
If you want to use the MCP server across multiple IDEs or workspaces simultaneously without duplicating memory, you can run a single global HTTP SSE server in the background:
|
|
57
|
+
|
|
58
|
+
```powershell
|
|
59
|
+
uv run factorio-ai-tools --sse --port 8000
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Then, configure your IDE or Claude client to connect to the SSE endpoint (e.g., `http://localhost:8000/sse`) instead of executing the CLI via `stdio`.
|
|
63
|
+
|
|
64
|
+
### Selective Tool Loading (Optional)
|
|
65
|
+
By default, the server loads all available tools. If you only want to expose specific tools to your LLM, you can use the `--enable-tools` or `--disable-tools` arguments.
|
|
66
|
+
For example, to *only* load the doc search and the blueprint decoder using `uvx`:
|
|
67
|
+
```json
|
|
68
|
+
"command": "uvx",
|
|
69
|
+
"args": [
|
|
70
|
+
"factorio-ai-tools",
|
|
71
|
+
"--enable-tools", "search_factorio_docs,decode_factorio_blueprint"
|
|
72
|
+
]
|
|
73
|
+
```
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
### Manual Developer Setup
|
|
77
|
+
If you wish to run the python scripts manually or ingest custom codebases:
|
|
78
|
+
1. Create a python virtual environment: `python -m venv venv` and activate it.
|
|
79
|
+
2. Run `pip install -r requirements.txt`.
|
|
80
|
+
3. *(Optional)* Run the ingestion scripts (`python -m factorio_ai_tools.ingest.ingest_factorio`, etc.) to rebuild the LanceDB tables.
|
|
81
|
+
4. *(Optional)* Ingest a specific GitHub Mod:
|
|
82
|
+
```powershell
|
|
83
|
+
python -m factorio_ai_tools.ingest.ingest_github_mod --repo-url https://github.com/notnotmelon/maraxsis
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Maintenance (Database Hygiene)
|
|
87
|
+
|
|
88
|
+
LanceDB is append-only: every ingest run adds new immutable versions and small data fragments, and **nothing is garbage-collected automatically**. Re-running an ingest script grows the on-disk history (e.g. `factorio_lancedb` had 155 versions / 469 files before its first compaction). To keep the committed stores lean:
|
|
89
|
+
|
|
90
|
+
```powershell
|
|
91
|
+
python maintenance/compact_lancedb.py # compact + prune every data/*_lancedb store
|
|
92
|
+
python maintenance/compact_lancedb.py --check # read-only; exits non-zero if a store is uncompacted
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
This runs LanceDB's `Table.optimize()` on each store — compacting fragments, pruning old versions, and folding new rows into existing indices. Do **not** run it while the server or an ingest script is writing.
|
|
96
|
+
|
|
97
|
+
**Recommended workflow:** let the version history accumulate on feature branches so a PR diff shows exactly what data changed, then run the compaction script before merging to `main` so the committed history stays collapsed.
|
|
98
|
+
|
|
99
|
+
To enforce that automatically, opt into the bundled pre-push guard (it blocks pushes to `main` while any store is uncompacted):
|
|
100
|
+
|
|
101
|
+
```powershell
|
|
102
|
+
git config core.hooksPath maintenance/hooks
|
|
103
|
+
# or copy maintenance/hooks/pre-push into .git/hooks/
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
## Tools Included
|
|
109
|
+
|
|
110
|
+
- `search_factorio_docs`: Look up Lua Runtime API methods, concepts, events, and Data Phase prototypes. Supports version filtering (`1.1.110` vs `latest`).
|
|
111
|
+
- `search_clusterio_code`: Semantically search the Clusterio Node.js architecture.
|
|
112
|
+
- `search_factorio_wiki`: Access game mechanics, ratios, and fluid mechanics straight from the Wiki.
|
|
113
|
+
- `search_mod_code`: Semantically search through specific downloaded GitHub mods (e.g., `maraxsis`) to read their Lua codebase.
|
|
114
|
+
- `decode_factorio_blueprint`: Convert Factorio blueprint strings (e.g. `0eNq...`) into easily readable/editable JSON.
|
|
115
|
+
- `encode_factorio_blueprint`: Compress generated JSON back into an importable Factorio blueprint string.
|
|
116
|
+
- `factorio_mod_portal_analyzer`: Scrape and summarize the Factorio Mod Portal for any given mod to retrieve dependencies and release versions.
|
|
117
|
+
|
|
118
|
+
- `get_mcp_version_info`: Self-diagnostics tool to verify the currently loaded database versions.
|
|
Binary file
|