entropy-predict 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. entropy_predict-0.1.0/.env.example +9 -0
  2. entropy_predict-0.1.0/.github/workflows/publish.yml +26 -0
  3. entropy_predict-0.1.0/.gitignore +55 -0
  4. entropy_predict-0.1.0/CLAUDE.md +176 -0
  5. entropy_predict-0.1.0/LICENSE +21 -0
  6. entropy_predict-0.1.0/PKG-INFO +191 -0
  7. entropy_predict-0.1.0/README.md +152 -0
  8. entropy_predict-0.1.0/docs/architecture.md +187 -0
  9. entropy_predict-0.1.0/docs/commands.md +557 -0
  10. entropy_predict-0.1.0/entropy/__init__.py +3 -0
  11. entropy_predict-0.1.0/entropy/cli/__init__.py +23 -0
  12. entropy_predict-0.1.0/entropy/cli/app.py +55 -0
  13. entropy_predict-0.1.0/entropy/cli/commands/__init__.py +27 -0
  14. entropy_predict-0.1.0/entropy/cli/commands/config_cmd.py +167 -0
  15. entropy_predict-0.1.0/entropy/cli/commands/extend.py +271 -0
  16. entropy_predict-0.1.0/entropy/cli/commands/network.py +187 -0
  17. entropy_predict-0.1.0/entropy/cli/commands/persona.py +344 -0
  18. entropy_predict-0.1.0/entropy/cli/commands/results.py +60 -0
  19. entropy_predict-0.1.0/entropy/cli/commands/sample.py +324 -0
  20. entropy_predict-0.1.0/entropy/cli/commands/scenario.py +250 -0
  21. entropy_predict-0.1.0/entropy/cli/commands/simulate.py +258 -0
  22. entropy_predict-0.1.0/entropy/cli/commands/spec.py +253 -0
  23. entropy_predict-0.1.0/entropy/cli/commands/validate.py +281 -0
  24. entropy_predict-0.1.0/entropy/cli/display.py +233 -0
  25. entropy_predict-0.1.0/entropy/cli/utils.py +317 -0
  26. entropy_predict-0.1.0/entropy/config.py +260 -0
  27. entropy_predict-0.1.0/entropy/core/__init__.py +16 -0
  28. entropy_predict-0.1.0/entropy/core/llm.py +176 -0
  29. entropy_predict-0.1.0/entropy/core/models/__init__.py +198 -0
  30. entropy_predict-0.1.0/entropy/core/models/network.py +188 -0
  31. entropy_predict-0.1.0/entropy/core/models/population.py +532 -0
  32. entropy_predict-0.1.0/entropy/core/models/results.py +125 -0
  33. entropy_predict-0.1.0/entropy/core/models/sampling.py +35 -0
  34. entropy_predict-0.1.0/entropy/core/models/scenario.py +318 -0
  35. entropy_predict-0.1.0/entropy/core/models/simulation.py +329 -0
  36. entropy_predict-0.1.0/entropy/core/models/validation.py +219 -0
  37. entropy_predict-0.1.0/entropy/core/providers/__init__.py +46 -0
  38. entropy_predict-0.1.0/entropy/core/providers/base.py +102 -0
  39. entropy_predict-0.1.0/entropy/core/providers/claude.py +326 -0
  40. entropy_predict-0.1.0/entropy/core/providers/logging.py +69 -0
  41. entropy_predict-0.1.0/entropy/core/providers/openai.py +337 -0
  42. entropy_predict-0.1.0/entropy/core/rate_limiter.py +252 -0
  43. entropy_predict-0.1.0/entropy/core/rate_limits.py +87 -0
  44. entropy_predict-0.1.0/entropy/population/__init__.py +79 -0
  45. entropy_predict-0.1.0/entropy/population/network/__init__.py +96 -0
  46. entropy_predict-0.1.0/entropy/population/network/config.py +150 -0
  47. entropy_predict-0.1.0/entropy/population/network/generator.py +434 -0
  48. entropy_predict-0.1.0/entropy/population/network/metrics.py +236 -0
  49. entropy_predict-0.1.0/entropy/population/network/similarity.py +229 -0
  50. entropy_predict-0.1.0/entropy/population/persona/__init__.py +48 -0
  51. entropy_predict-0.1.0/entropy/population/persona/config.py +222 -0
  52. entropy_predict-0.1.0/entropy/population/persona/generator.py +685 -0
  53. entropy_predict-0.1.0/entropy/population/persona/renderer.py +380 -0
  54. entropy_predict-0.1.0/entropy/population/persona/stats.py +77 -0
  55. entropy_predict-0.1.0/entropy/population/sampler/__init__.py +57 -0
  56. entropy_predict-0.1.0/entropy/population/sampler/core.py +363 -0
  57. entropy_predict-0.1.0/entropy/population/sampler/distributions.py +339 -0
  58. entropy_predict-0.1.0/entropy/population/sampler/modifiers.py +246 -0
  59. entropy_predict-0.1.0/entropy/population/spec_builder/__init__.py +64 -0
  60. entropy_predict-0.1.0/entropy/population/spec_builder/binder.py +167 -0
  61. entropy_predict-0.1.0/entropy/population/spec_builder/hydrator.py +231 -0
  62. entropy_predict-0.1.0/entropy/population/spec_builder/hydrators/__init__.py +18 -0
  63. entropy_predict-0.1.0/entropy/population/spec_builder/hydrators/conditional.py +448 -0
  64. entropy_predict-0.1.0/entropy/population/spec_builder/hydrators/derived.py +161 -0
  65. entropy_predict-0.1.0/entropy/population/spec_builder/hydrators/independent.py +196 -0
  66. entropy_predict-0.1.0/entropy/population/spec_builder/hydrators/prompts.py +250 -0
  67. entropy_predict-0.1.0/entropy/population/spec_builder/parsers.py +194 -0
  68. entropy_predict-0.1.0/entropy/population/spec_builder/schemas.py +293 -0
  69. entropy_predict-0.1.0/entropy/population/spec_builder/selector.py +320 -0
  70. entropy_predict-0.1.0/entropy/population/spec_builder/sufficiency.py +92 -0
  71. entropy_predict-0.1.0/entropy/population/validator/__init__.py +55 -0
  72. entropy_predict-0.1.0/entropy/population/validator/llm_response.py +646 -0
  73. entropy_predict-0.1.0/entropy/population/validator/semantic.py +250 -0
  74. entropy_predict-0.1.0/entropy/population/validator/spec.py +42 -0
  75. entropy_predict-0.1.0/entropy/population/validator/structural.py +762 -0
  76. entropy_predict-0.1.0/entropy/scenario/__init__.py +100 -0
  77. entropy_predict-0.1.0/entropy/scenario/compiler.py +303 -0
  78. entropy_predict-0.1.0/entropy/scenario/exposure.py +285 -0
  79. entropy_predict-0.1.0/entropy/scenario/interaction.py +293 -0
  80. entropy_predict-0.1.0/entropy/scenario/outcomes.py +139 -0
  81. entropy_predict-0.1.0/entropy/scenario/parser.py +210 -0
  82. entropy_predict-0.1.0/entropy/scenario/validator.py +550 -0
  83. entropy_predict-0.1.0/entropy/simulation/__init__.py +144 -0
  84. entropy_predict-0.1.0/entropy/simulation/aggregation.py +298 -0
  85. entropy_predict-0.1.0/entropy/simulation/engine.py +766 -0
  86. entropy_predict-0.1.0/entropy/simulation/persona.py +381 -0
  87. entropy_predict-0.1.0/entropy/simulation/propagation.py +276 -0
  88. entropy_predict-0.1.0/entropy/simulation/reasoning.py +829 -0
  89. entropy_predict-0.1.0/entropy/simulation/state.py +839 -0
  90. entropy_predict-0.1.0/entropy/simulation/stopping.py +291 -0
  91. entropy_predict-0.1.0/entropy/simulation/timeline.py +259 -0
  92. entropy_predict-0.1.0/entropy/utils/__init__.py +65 -0
  93. entropy_predict-0.1.0/entropy/utils/distributions.py +227 -0
  94. entropy_predict-0.1.0/entropy/utils/eval_safe.py +122 -0
  95. entropy_predict-0.1.0/entropy/utils/expressions.py +220 -0
  96. entropy_predict-0.1.0/entropy/utils/graphs.py +117 -0
  97. entropy_predict-0.1.0/pyproject.toml +64 -0
  98. entropy_predict-0.1.0/scripts/test_provider.py +83 -0
  99. entropy_predict-0.1.0/stash/CAPABILITIES.md +158 -0
  100. entropy_predict-0.1.0/stash/SIMULATION_IMPROVEMENTS.md +242 -0
  101. entropy_predict-0.1.0/stash/commands.md +616 -0
  102. entropy_predict-0.1.0/stash/persona-implementation.md +519 -0
  103. entropy_predict-0.1.0/stash/phase1-currentflow.md +328 -0
  104. entropy_predict-0.1.0/stash/simulation-redesign.md +895 -0
  105. entropy_predict-0.1.0/stash/simulation.md +315 -0
  106. entropy_predict-0.1.0/tests/__init__.py +1 -0
  107. entropy_predict-0.1.0/tests/conftest.py +412 -0
  108. entropy_predict-0.1.0/tests/test_models.py +509 -0
  109. entropy_predict-0.1.0/tests/test_network.py +665 -0
  110. entropy_predict-0.1.0/tests/test_sampler.py +1237 -0
  111. entropy_predict-0.1.0/tests/test_scenario.py +674 -0
  112. entropy_predict-0.1.0/tests/test_simulation.py +548 -0
  113. entropy_predict-0.1.0/tests/test_validator.py +740 -0
  114. entropy_predict-0.1.0/uv.lock +874 -0
@@ -0,0 +1,9 @@
1
+ # Entropy API Keys
2
+ # These are the only settings that belong in .env (secrets only).
3
+ # For provider/model config, use: entropy config set <key> <value>
4
+
5
+ # OpenAI (required if using openai as provider)
6
+ OPENAI_API_KEY=sk-...
7
+
8
+ # Anthropic (from https://console.anthropic.com/settings/keys)
9
+ ANTHROPIC_API_KEY=sk-ant-...
@@ -0,0 +1,26 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ environment: pypi
11
+ permissions:
12
+ id-token: write
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Build package
21
+ run: |
22
+ pip install build
23
+ python -m build
24
+
25
+ - name: Publish to PyPI
26
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,55 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ env/
25
+ venv/
26
+ .venv/
27
+
28
+ # IDE
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+
34
+ # Environment
35
+ .env
36
+
37
+ # Storage (database and populations)
38
+ storage/
39
+
40
+ # Simulation results
41
+ results/
42
+
43
+ # Cache
44
+ data/cache/
45
+
46
+ # Logs
47
+ logs/
48
+
49
+ # OS
50
+ .DS_Store
51
+ Thumbs.db
52
+
53
+
54
+ austin/
55
+ examples/
@@ -0,0 +1,176 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## What Entropy Is
6
+
7
+ Entropy is a predictive intelligence framework that simulates how real human populations respond to scenarios. It creates synthetic populations grounded in real-world statistical data, enriches them with LLM-extrapolated psychographic attributes, connects them via social networks, and runs agent-based simulations where each agent reasons individually via LLM calls. The output is not a poll — it's a simulation of emergent collective behavior.
8
+
9
+ Competitor reference: [Aaru](https://aaru.com) operates in the same space (multi-agent population simulation for predictive intelligence). Entropy differentiates through its grounding pipeline — every attribute distribution is researched from real-world sources with citations, not just LLM-generated.
10
+
11
+ ## Commands
12
+
13
+ ```bash
14
+ pip install -e ".[dev]" # Install with dev deps
15
+
16
+ # Set API keys (secrets only — in .env or env vars)
17
+ export ANTHROPIC_API_KEY=sk-ant-...
18
+ export OPENAI_API_KEY=sk-...
19
+
20
+ # Configure providers and models
21
+ entropy config set pipeline.provider claude # Use Claude for population/scenario building
22
+ entropy config set simulation.provider openai # Use OpenAI for agent reasoning
23
+ entropy config set simulation.model gpt-5-mini # Override simulation model
24
+ entropy config set simulation.routine_model gpt-5-mini # Cheap model for Pass 2 classification
25
+ entropy config set simulation.rate_tier 2 # Rate limit tier (1-4)
26
+ entropy config show # View current config
27
+
28
+ pytest # Run all tests
29
+ pytest tests/test_sampler.py # Single test file
30
+ pytest -k "test_name" # Single test by name
31
+
32
+ ruff check . # Lint
33
+ ruff format . # Format
34
+ ```
35
+
36
+ CLI entry point: `entropy` (defined in `pyproject.toml` → `entropy.cli:app`). Python >=3.11.
37
+
38
+ ## Pipeline (7 sequential commands)
39
+
40
+ ```
41
+ entropy spec → entropy extend → entropy sample → entropy network → entropy persona → entropy scenario → entropy simulate
42
+
43
+ entropy results
44
+ ```
45
+
46
+ Each command produces a file consumed by the next. `entropy validate` is a utility runnable at any point. `entropy results` is a viewer for simulation output.
47
+
48
+ ## Architecture
49
+
50
+ Three phases, each mapping to a package under `entropy/`:
51
+
52
+ ### Phase 1: Population Creation (`entropy/population/`)
53
+
54
+ **The validity pipeline.** This is where predictive accuracy is won or lost.
55
+
56
+ 1. **Sufficiency check** (`spec_builder/sufficiency.py`) — LLM validates the description has enough context (who, how many, where).
57
+
58
+ 2. **Attribute selection** (`spec_builder/selector.py`) — LLM discovers 25-40 attributes across 4 categories: `universal` (age, gender), `population_specific` (specialty, seniority), `context_specific` (scenario-relevant), `personality` (Big Five). Each attribute gets a type (`int`/`float`/`categorical`/`boolean`) and sampling strategy (`independent`/`derived`/`conditional`).
59
+
60
+ 3. **Hydration** (`spec_builder/hydrator.py` → `hydrators/`) — The most important step. Four sub-steps, each using different LLM tiers:
61
+ - **2a: Independent** (`hydrators/independent.py`) — `agentic_research()` with web search finds real-world distributions with source URLs. This is the grounding layer.
62
+ - **2b: Derived** (`hydrators/derived.py`) — `reasoning_call()` specifies deterministic formulas (e.g., `years_experience = age - 26`).
63
+ - **2c: Conditional base** (`hydrators/conditional.py`) — `agentic_research()` finds base distributions for attributes that depend on others.
64
+ - **2d: Conditional modifiers** (`hydrators/conditional.py`) — `reasoning_call()` specifies how attribute values shift based on other attributes. Type-specific: numeric gets `multiply`/`add`, categorical gets `weight_overrides`, boolean gets `probability_override`.
65
+
66
+ 4. **Constraint binding** (`spec_builder/binder.py`) — Topological sort (Kahn's algorithm, `utils/graphs.py`) resolves attribute dependencies into a valid sampling order. Raises `CircularDependencyError` with cycle path.
67
+
68
+ 5. **Sampling** (`sampler/core.py`) — Iterates through `sampling_order`, routing each attribute by strategy. Supports 6 distribution types: normal, lognormal, uniform, beta, categorical, boolean. Hard constraints (min/max) are clamped post-sampling. Formula parameters evaluated via `utils/eval_safe.py` (restricted Python eval, whitelisted builtins only).
69
+
70
+ 6. **Network generation** (`network/generator.py`) — Hybrid algorithm: similarity-based edge probability with degree correction, calibrated via binary search to hit target avg_degree, then Watts-Strogatz rewiring (5%) for small-world properties. Edge probability: `base_rate * sigmoid(similarity) * degree_factor_a * degree_factor_b`.
71
+
72
+ ### Phase 2: Scenario Compilation (`entropy/scenario/`)
73
+
74
+ **Compiler** (`compiler.py`) orchestrates 5 steps: parse event → generate exposure rules → determine interaction model → define outcomes → assemble spec.
75
+
76
+ - **Event types**: product_launch, policy_change, pricing_change, technology_release, organizational_change, market_event, crisis_event
77
+ - **Exposure channels**: broadcast, targeted, organic — with per-timestep rules containing conditions and probabilities
78
+ - **Outcomes**: categorical (enum options), boolean, float (with range), open_ended
79
+ - Auto-configures simulation parameters based on population size (<500: 50 timesteps, ≤5000: 100, >5000: 168)
80
+
81
+ ### Phase 3: Simulation (`entropy/simulation/`)
82
+
83
+ **Engine** (`engine.py`) runs per-timestep loop:
84
+ 1. Apply seed exposures from scenario rules (`propagation.py`)
85
+ 2. Propagate through network — conviction-gated sharing (very_uncertain agents don't share)
86
+ 3. Select agents to reason — first exposure OR multi-touch threshold exceeded (default: 3 new exposures since last reasoning)
87
+ 4. **Two-pass async LLM reasoning** (`reasoning.py`) — Rate-limiter-controlled (token bucket per provider/model):
88
+ - **Pass 1 (role-play)**: Agent reasons in first person with no categorical enums. Produces reasoning, public_statement, sentiment, conviction level, will_share. Memory trace (last 3 reasoning summaries) is fed back for re-reasoning agents.
89
+ - **Pass 2 (classification)**: A cheap model classifies the free-text reasoning into scenario-defined categorical/boolean/float outcomes. Position is extracted here — it is output-only, never used in peer influence.
90
+ 5. **Conviction-based flip resistance**: Firm+ conviction agents reject position flips unless new conviction is moderate+
91
+ 6. **Semantic peer influence**: Agents see peers' public_statement + sentiment tone, NOT position labels
92
+ 7. Update state (`state.py`) — SQLite-backed with indexed tables for agent_states, exposures, memory_traces, timeline
93
+ 8. Check stopping conditions (`stopping.py`) — Compound conditions like `"exposure_rate > 0.95 and no_state_changes_for > 10"`, convergence detection via sentiment variance
94
+
95
+ **Two-pass reasoning rationale**: Single-pass reasoning caused 83% of agents to pick safe middle options (central tendency bias). Splitting role-play from classification fixes this — agents reason naturally in Pass 1, then a cheap model maps to categories in Pass 2.
96
+
97
+ **Conviction system**: Agents pick from categorical levels (`very_uncertain` / `leaning` / `moderate` / `firm` / `absolute`), mapped to floats (0.1/0.3/0.5/0.7/0.9) only for storage and threshold math. Agents never see numeric values.
98
+
99
+ **Rate limiter** (`core/rate_limiter.py`): Token bucket with dual RPM + TPM buckets. Provider-aware defaults from `core/rate_limits.py` (Anthropic/OpenAI, tiers 1-4). Replaces the old hardcoded `Semaphore(50)`. CLI flags: `--rate-tier`, config: `simulation.rate_tier`, `simulation.rpm_override`, `simulation.tpm_override`.
100
+
101
+ **Persona system** (`population/persona/` + `simulation/persona.py`): The `entropy persona` command generates a `PersonaConfig` via 5-step LLM pipeline (structure → boolean → categorical → relative → concrete phrasings). At simulation time, agents are rendered computationally using this config — no per-agent LLM calls. Relative attributes (personality, attitudes) are positioned against population stats via z-scores ("I'm much more price-sensitive than most people"). Concrete attributes use format specs for proper number/time rendering. **Trait salience**: If `decision_relevant_attributes` is set on `OutcomeConfig`, those attributes are grouped first under "Most Relevant to This Decision" in the persona.
102
+
103
+ ## LLM Integration (`entropy/core/llm.py`)
104
+
105
+ All LLM calls go through this file — never call providers directly elsewhere. Two-zone routing:
106
+
107
+ **Pipeline zone** (phases 1-2: spec, extend, persona, scenario) — configured via `entropy config set pipeline.*`:
108
+
109
+ | Function | Default Model | Tools | Use |
110
+ |----------|--------------|-------|-----|
111
+ | `simple_call()` | provider default (haiku/gpt-5-mini) | none | Sufficiency checks, simple extractions |
112
+ | `reasoning_call()` | provider default (sonnet/gpt-5) | none | Attribute selection, hydration, scenario compilation. Supports validator callback + retry |
113
+ | `agentic_research()` | provider default (sonnet/gpt-5) | web_search | Distribution hydration with real-world data. Extracts source URLs |
114
+
115
+ **Simulation zone** (phase 3: agent reasoning) — configured via `entropy config set simulation.*`:
116
+
117
+ | Function | Default Model | Tools | Use |
118
+ |----------|--------------|-------|-----|
119
+ | `simple_call_async()` | provider default | none | Pass 1 role-play reasoning + Pass 2 classification (async) |
120
+
121
+ Two-pass model routing: Pass 1 uses `simulation.model` (pivotal reasoning). Pass 2 uses `simulation.routine_model` (cheap classification). Both default to provider default if not set. CLI: `--model`, `--pivotal-model`, `--routine-model`. Standard inference only — no thinking/extended models (no o1, o3, extended thinking).
122
+
123
+ **Provider abstraction** (`entropy/core/providers/`): `LLMProvider` base class with `OpenAIProvider` and `ClaudeProvider` implementations. Factory functions `get_pipeline_provider()` and `get_simulation_provider()` read from `EntropyConfig`.
124
+
125
+ **Config** (`entropy/config.py`): `EntropyConfig` with `PipelineConfig` and `SimZoneConfig` zones. Resolution order: env vars > config file (`~/.config/entropy/config.json`) > defaults. API keys always from env vars (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`). For package use: `from entropy.config import configure, EntropyConfig`.
126
+
127
+ **Default zones**: Pipeline = Claude (population/scenario building). Simulation = OpenAI (agent reasoning). `SimZoneConfig` fields: `provider`, `model`, `pivotal_model`, `routine_model`, `max_concurrent`, `rate_tier`, `rpm_override`, `tpm_override`.
128
+
129
+ All calls use structured output (`response_format: json_schema`). Failed validations are fed back as "PREVIOUS ATTEMPT FAILED" prompts for self-correction.
130
+
131
+ ## Data Models (`entropy/core/models/`)
132
+
133
+ All Pydantic v2. Key hierarchy:
134
+
135
+ - `population.py`: `PopulationSpec` → `AttributeSpec` → `SamplingConfig` → `Distribution` / `Modifier` / `Constraint`
136
+ - `scenario.py`: `ScenarioSpec` → `Event`, `SeedExposure` (channels + rules), `InteractionConfig`, `SpreadConfig`, `OutcomeConfig`
137
+ - `simulation.py`: `ConvictionLevel`, `MemoryEntry`, `AgentState` (conviction, public_statement), `PeerOpinion` (public_statement, credibility), `ReasoningContext` (memory_trace), `ReasoningResponse` (conviction, public_statement, reasoning_summary), `SimulationRunConfig` (pivotal_model, routine_model), `TimestepSummary` (average_conviction, sentiment_variance)
138
+ - `network.py`: `Edge`, `NodeMetrics`, `NetworkMetrics`
139
+ - `validation.py`: `ValidationIssue`, `ValidationResult`
140
+
141
+ YAML serialization via `to_yaml()`/`from_yaml()` on `PopulationSpec` and `ScenarioSpec`.
142
+
143
+ ## Validation (`entropy/population/validator/`)
144
+
145
+ Two layers for population specs:
146
+ - **Structural** (`structural.py`): ERROR-level — type/modifier compatibility, range violations, distribution params, dependency cycles, condition syntax, formula references, duplicates, strategy consistency
147
+ - **Semantic** (`semantic.py`): WARNING-level — no-op detection, modifier stacking, categorical option reference validity
148
+
149
+ Scenario validation (`entropy/scenario/validator.py`): attribute reference validity, edge type references, probability ranges.
150
+
151
+ ## Key Conventions
152
+
153
+ - Conditions and formulas use restricted Python syntax via `eval_safe()` — whitelisted builtins only (abs, min, max, round, int, float, str, len, sum, all, any, bool)
154
+ - Agent IDs use the `_id` field from agent JSON, falling back to string index
155
+ - Network edges are bidirectional (stored as source/target, traversed both ways)
156
+ - Exposure credibility: `event_credibility * channel_credibility` for seed, fixed 0.85 for peer
157
+ - "Position" = first required categorical outcome (extracted in Pass 2, used for aggregation/output only — never used in peer influence)
158
+ - Peer influence is semantic: agents see neighbors' `public_statement` + sentiment tone, not position labels
159
+ - Conviction is categorical for agents (`very_uncertain`/`leaning`/`moderate`/`firm`/`absolute`), mapped to floats (0.1–0.9) only for storage/thresholds
160
+ - Memory traces: 3-entry sliding window per agent, fed back into reasoning prompts for re-reasoning
161
+ - The `persona` command generates detailed persona configs; `extend` still generates a simpler `persona_template` for backwards compatibility
162
+ - Simulation auto-detects `{population_stem}.persona.yaml` and uses the new rendering if present
163
+ - Network config defaults in `network/config.py` are currently hardcoded for the German surgeons example and need generalization
164
+
165
+ ## Tests
166
+
167
+ pytest + pytest-asyncio. Fixtures in `tests/conftest.py` include seeded RNG (`Random(42)`), minimal/complex population specs, and sample agents. Six test files covering models, network, sampler, scenario, simulation, validator.
168
+
169
+ ## File Formats
170
+
171
+ - Population/scenario specs: YAML
172
+ - Agents: JSON (array of objects with `_id`)
173
+ - Network: JSON (`{meta, nodes, edges}`)
174
+ - Simulation state: SQLite (tables: agent_states, exposures, memory_traces, timeline, timestep_summaries)
175
+ - Timeline: JSONL (streaming, crash-safe)
176
+ - Results: JSON files in output directory (agent_states.json, by_timestep.json, outcome_distributions.json, meta.json)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Devesh Paragiri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,191 @@
1
+ Metadata-Version: 2.4
2
+ Name: entropy-predict
3
+ Version: 0.1.0
4
+ Summary: Predictive intelligence through agent-based population simulation
5
+ Project-URL: Homepage, https://github.com/exaforge/entropy
6
+ Project-URL: Repository, https://github.com/exaforge/entropy
7
+ Project-URL: Issues, https://github.com/exaforge/entropy/issues
8
+ Author: Devesh Paragiri
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agent-based,llm,population,predictive-intelligence,simulation
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: anthropic>=0.77.0
23
+ Requires-Dist: fastapi>=0.115.0
24
+ Requires-Dist: networkx>=3.4.0
25
+ Requires-Dist: numpy>=2.0.0
26
+ Requires-Dist: openai>=1.50.0
27
+ Requires-Dist: pydantic>=2.9.0
28
+ Requires-Dist: python-dotenv>=1.0.0
29
+ Requires-Dist: pyyaml>=6.0.0
30
+ Requires-Dist: rich>=13.0.0
31
+ Requires-Dist: scipy>=1.14.0
32
+ Requires-Dist: typer>=0.12.0
33
+ Requires-Dist: uvicorn>=0.32.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
36
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
37
+ Requires-Dist: ruff>=0.14.0; extra == 'dev'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # Entropy
41
+
42
+ Predictive intelligence through agent-based population simulation. Create synthetic populations grounded in real-world data, simulate how they respond to events, and watch opinions emerge through social networks.
43
+
44
+ Not a survey. Not a poll. A simulation of collective human behavior.
45
+
46
+ ## What It Does
47
+
48
+ You describe a population and a scenario. Entropy builds statistically grounded synthetic agents, connects them in a social network, and has each one reason individually about the event using an LLM. Opinions form, spread through the network, and evolve — producing distributional predictions you can segment and analyze.
49
+
50
+ ```
51
+ entropy spec → entropy extend → entropy sample → entropy network → entropy persona → entropy scenario → entropy simulate
52
+
53
+ entropy results
54
+ ```
55
+
56
+ ## Install
57
+
58
+ ```bash
59
+ pip install entropy-predict
60
+ ```
61
+
62
+ Or from source:
63
+
64
+ ```bash
65
+ git clone https://github.com/exaforge/entropy.git
66
+ cd entropy
67
+ pip install -e ".[dev]"
68
+ ```
69
+
70
+ ## Setup
71
+
72
+ ```bash
73
+ # API keys (in .env or exported)
74
+ export OPENAI_API_KEY=sk-...
75
+ export ANTHROPIC_API_KEY=sk-ant-...
76
+
77
+ # Configure providers
78
+ entropy config set pipeline.provider claude # Claude for population/scenario building
79
+ entropy config set simulation.provider openai # OpenAI for agent reasoning
80
+ entropy config show
81
+ ```
82
+
83
+ ## Quick Start
84
+
85
+ ```bash
86
+ # Build a population
87
+ entropy spec "500 Austin TX commuters who drive into downtown for work" -o austin/base.yaml
88
+ entropy extend austin/base.yaml -s "Response to a $15/day downtown congestion tax" -o austin/population.yaml
89
+ entropy sample austin/population.yaml -o austin/agents.json --seed 42
90
+ entropy network austin/agents.json -o austin/network.json --seed 42
91
+ entropy persona austin/population.yaml --agents austin/agents.json
92
+
93
+ # Compile and run a scenario
94
+ entropy scenario -p austin/population.yaml -a austin/agents.json -n austin/network.json -o austin/scenario.yaml
95
+ entropy simulate austin/scenario.yaml -o austin/results/ --seed 42
96
+
97
+ # View results
98
+ entropy results austin/results/
99
+ entropy results austin/results/ --segment income
100
+ ```
101
+
102
+ ### What Comes Out
103
+
104
+ Outcomes are defined per-scenario — categorical, float, boolean, or open-ended. You choose what to measure.
105
+
106
+ ```
107
+ ═══════════════════════════════════════════════════════════
108
+ SIMULATION RESULTS: austin_congestion_tax
109
+ ═══════════════════════════════════════════════════════════
110
+
111
+ Population: 500 agents | Duration: 47 timesteps | Model: gpt-5
112
+ Stopped: exposure_rate > 0.95 and no_state_changes_for > 5
113
+
114
+ EXPOSURE
115
+ ────────────────────────────────────────
116
+ Final exposure rate: 96.8%
117
+ Reasoning calls: 1,847
118
+ Average conviction: 0.64 (moderate-to-firm)
119
+
120
+ OUTCOMES
121
+ ────────────────────────────────────────
122
+ commute_response (categorical):
123
+ drive_and_pay 38% ███████████████░░░░░
124
+ switch_to_transit 24% █████████░░░░░░░░░░░
125
+ shift_schedule 19% ███████░░░░░░░░░░░░░
126
+ telework_more 12% ████░░░░░░░░░░░░░░░░
127
+ undecided 7% ██░░░░░░░░░░░░░░░░░░
128
+
129
+ sentiment (float, -1 to 1):
130
+ mean: -0.18 std: 0.41 min: -0.9 max: 0.7
131
+
132
+ willingness_to_pay (boolean):
133
+ yes: 42% no: 58%
134
+
135
+ protest_likelihood (float, 0 to 1):
136
+ mean: 0.31 std: 0.28
137
+
138
+ SEGMENT: income
139
+ ────────────────────────────────────────
140
+ < $50k: drive_and_pay 22% | switch_to_transit 14% | protest 41%
141
+ $50-100k: drive_and_pay 40% | switch_to_transit 28% | shift_schedule 21%
142
+ > $100k: drive_and_pay 51% | switch_to_transit 31% | telework_more 14%
143
+ ```
144
+
145
+ Each agent reasoned individually. A low-income commuter with no transit access reacts differently than a tech worker near a rail stop — not because we scripted it, but because their attributes, persona, and social context led them there.
146
+
147
+ The scenario YAML controls what gets tracked:
148
+
149
+ ```yaml
150
+ outcomes:
151
+ suggested_outcomes:
152
+ - name: commute_response
153
+ type: categorical
154
+ options: [drive_and_pay, switch_to_transit, shift_schedule, telework_more, undecided]
155
+ - name: sentiment
156
+ type: float
157
+ range: [-1.0, 1.0]
158
+ - name: willingness_to_pay
159
+ type: boolean
160
+ - name: protest_likelihood
161
+ type: float
162
+ range: [0.0, 1.0]
163
+ ```
164
+
165
+ ## How It Works
166
+
167
+ **Population creation** — An LLM discovers relevant attributes (demographics, psychographics, scenario-specific), then researches real-world distributions with citations. Agents are sampled from these distributions respecting all dependencies. A social network connects them based on attribute similarity with small-world properties.
168
+
169
+ **Persona rendering** — Each agent gets a first-person narrative built from their attributes. Relative traits are positioned against population statistics ("I'm much more price-sensitive than most people"). Generated once per population, applied computationally per agent.
170
+
171
+ **Two-pass reasoning** — Pass 1: the agent role-plays their reaction in natural language (no enum labels, no anchoring). Pass 2: a cheap model classifies the freeform response into outcome categories. This eliminates the central tendency bias that plagues single-pass structured extraction.
172
+
173
+ **Network propagation** — Agents share information through social connections. Edge types, spread modifiers, and decay control how opinions travel. Multi-touch re-reasoning lets agents update their position after hearing from multiple peers.
174
+
175
+ ## Documentation
176
+
177
+ - **[CLI Reference](docs/commands.md)** — Every command with arguments, options, and examples
178
+ - **[Architecture](docs/architecture.md)** — How the system works under the hood
179
+
180
+ ## Development
181
+
182
+ ```bash
183
+ pip install -e ".[dev]"
184
+ pytest # Run tests
185
+ ruff check . # Lint
186
+ ruff format . # Format
187
+ ```
188
+
189
+ ## License
190
+
191
+ MIT
@@ -0,0 +1,152 @@
1
+ # Entropy
2
+
3
+ Predictive intelligence through agent-based population simulation. Create synthetic populations grounded in real-world data, simulate how they respond to events, and watch opinions emerge through social networks.
4
+
5
+ Not a survey. Not a poll. A simulation of collective human behavior.
6
+
7
+ ## What It Does
8
+
9
+ You describe a population and a scenario. Entropy builds statistically grounded synthetic agents, connects them in a social network, and has each one reason individually about the event using an LLM. Opinions form, spread through the network, and evolve — producing distributional predictions you can segment and analyze.
10
+
11
+ ```
12
+ entropy spec → entropy extend → entropy sample → entropy network → entropy persona → entropy scenario → entropy simulate
13
+
14
+ entropy results
15
+ ```
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install entropy-predict
21
+ ```
22
+
23
+ Or from source:
24
+
25
+ ```bash
26
+ git clone https://github.com/exaforge/entropy.git
27
+ cd entropy
28
+ pip install -e ".[dev]"
29
+ ```
30
+
31
+ ## Setup
32
+
33
+ ```bash
34
+ # API keys (in .env or exported)
35
+ export OPENAI_API_KEY=sk-...
36
+ export ANTHROPIC_API_KEY=sk-ant-...
37
+
38
+ # Configure providers
39
+ entropy config set pipeline.provider claude # Claude for population/scenario building
40
+ entropy config set simulation.provider openai # OpenAI for agent reasoning
41
+ entropy config show
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ```bash
47
+ # Build a population
48
+ entropy spec "500 Austin TX commuters who drive into downtown for work" -o austin/base.yaml
49
+ entropy extend austin/base.yaml -s "Response to a $15/day downtown congestion tax" -o austin/population.yaml
50
+ entropy sample austin/population.yaml -o austin/agents.json --seed 42
51
+ entropy network austin/agents.json -o austin/network.json --seed 42
52
+ entropy persona austin/population.yaml --agents austin/agents.json
53
+
54
+ # Compile and run a scenario
55
+ entropy scenario -p austin/population.yaml -a austin/agents.json -n austin/network.json -o austin/scenario.yaml
56
+ entropy simulate austin/scenario.yaml -o austin/results/ --seed 42
57
+
58
+ # View results
59
+ entropy results austin/results/
60
+ entropy results austin/results/ --segment income
61
+ ```
62
+
63
+ ### What Comes Out
64
+
65
+ Outcomes are defined per-scenario — categorical, float, boolean, or open-ended. You choose what to measure.
66
+
67
+ ```
68
+ ═══════════════════════════════════════════════════════════
69
+ SIMULATION RESULTS: austin_congestion_tax
70
+ ═══════════════════════════════════════════════════════════
71
+
72
+ Population: 500 agents | Duration: 47 timesteps | Model: gpt-5
73
+ Stopped: exposure_rate > 0.95 and no_state_changes_for > 5
74
+
75
+ EXPOSURE
76
+ ────────────────────────────────────────
77
+ Final exposure rate: 96.8%
78
+ Reasoning calls: 1,847
79
+ Average conviction: 0.64 (moderate-to-firm)
80
+
81
+ OUTCOMES
82
+ ────────────────────────────────────────
83
+ commute_response (categorical):
84
+ drive_and_pay 38% ███████████████░░░░░
85
+ switch_to_transit 24% █████████░░░░░░░░░░░
86
+ shift_schedule 19% ███████░░░░░░░░░░░░░
87
+ telework_more 12% ████░░░░░░░░░░░░░░░░
88
+ undecided 7% ██░░░░░░░░░░░░░░░░░░
89
+
90
+ sentiment (float, -1 to 1):
91
+ mean: -0.18 std: 0.41 min: -0.9 max: 0.7
92
+
93
+ willingness_to_pay (boolean):
94
+ yes: 42% no: 58%
95
+
96
+ protest_likelihood (float, 0 to 1):
97
+ mean: 0.31 std: 0.28
98
+
99
+ SEGMENT: income
100
+ ────────────────────────────────────────
101
+ < $50k: drive_and_pay 22% | switch_to_transit 14% | protest 41%
102
+ $50-100k: drive_and_pay 40% | switch_to_transit 28% | shift_schedule 21%
103
+ > $100k: drive_and_pay 51% | switch_to_transit 31% | telework_more 14%
104
+ ```
105
+
106
+ Each agent reasoned individually. A low-income commuter with no transit access reacts differently than a tech worker near a rail stop — not because we scripted it, but because their attributes, persona, and social context led them there.
107
+
108
+ The scenario YAML controls what gets tracked:
109
+
110
+ ```yaml
111
+ outcomes:
112
+ suggested_outcomes:
113
+ - name: commute_response
114
+ type: categorical
115
+ options: [drive_and_pay, switch_to_transit, shift_schedule, telework_more, undecided]
116
+ - name: sentiment
117
+ type: float
118
+ range: [-1.0, 1.0]
119
+ - name: willingness_to_pay
120
+ type: boolean
121
+ - name: protest_likelihood
122
+ type: float
123
+ range: [0.0, 1.0]
124
+ ```
125
+
126
+ ## How It Works
127
+
128
+ **Population creation** — An LLM discovers relevant attributes (demographics, psychographics, scenario-specific), then researches real-world distributions with citations. Agents are sampled from these distributions respecting all dependencies. A social network connects them based on attribute similarity with small-world properties.
129
+
130
+ **Persona rendering** — Each agent gets a first-person narrative built from their attributes. Relative traits are positioned against population statistics ("I'm much more price-sensitive than most people"). Generated once per population, applied computationally per agent.
131
+
132
+ **Two-pass reasoning** — Pass 1: the agent role-plays their reaction in natural language (no enum labels, no anchoring). Pass 2: a cheap model classifies the freeform response into outcome categories. This eliminates the central tendency bias that plagues single-pass structured extraction.
133
+
134
+ **Network propagation** — Agents share information through social connections. Edge types, spread modifiers, and decay control how opinions travel. Multi-touch re-reasoning lets agents update their position after hearing from multiple peers.
135
+
136
+ ## Documentation
137
+
138
+ - **[CLI Reference](docs/commands.md)** — Every command with arguments, options, and examples
139
+ - **[Architecture](docs/architecture.md)** — How the system works under the hood
140
+
141
+ ## Development
142
+
143
+ ```bash
144
+ pip install -e ".[dev]"
145
+ pytest # Run tests
146
+ ruff check . # Lint
147
+ ruff format . # Format
148
+ ```
149
+
150
+ ## License
151
+
152
+ MIT