resynth 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. resynth-0.1.0/.env.example +25 -0
  2. resynth-0.1.0/CHANGELOG.md +54 -0
  3. resynth-0.1.0/CONTRIBUTING.md +225 -0
  4. resynth-0.1.0/LICENSE +21 -0
  5. resynth-0.1.0/MANIFEST.in +73 -0
  6. resynth-0.1.0/Makefile +86 -0
  7. resynth-0.1.0/PKG-INFO +414 -0
  8. resynth-0.1.0/README.md +320 -0
  9. resynth-0.1.0/pyproject.toml +207 -0
  10. resynth-0.1.0/requirements.txt +21 -0
  11. resynth-0.1.0/setup.cfg +4 -0
  12. resynth-0.1.0/setup.py +107 -0
  13. resynth-0.1.0/setup_fixed.py +109 -0
  14. resynth-0.1.0/src/resynth/__init__.py +146 -0
  15. resynth-0.1.0/src/resynth/cli.py +125 -0
  16. resynth-0.1.0/src/resynth/embeddings/__init__.py +8 -0
  17. resynth-0.1.0/src/resynth/embeddings/embedding_manager.py +147 -0
  18. resynth-0.1.0/src/resynth/embeddings/vector_store.py +208 -0
  19. resynth-0.1.0/src/resynth/fetchers/__init__.py +9 -0
  20. resynth-0.1.0/src/resynth/fetchers/arxiv_fetcher.py +101 -0
  21. resynth-0.1.0/src/resynth/fetchers/base_fetcher.py +48 -0
  22. resynth-0.1.0/src/resynth/fetchers/pubmed_fetcher.py +162 -0
  23. resynth-0.1.0/src/resynth/processors/__init__.py +8 -0
  24. resynth-0.1.0/src/resynth/processors/chunker.py +169 -0
  25. resynth-0.1.0/src/resynth/processors/text_processor.py +145 -0
  26. resynth-0.1.0/src/resynth/retrieval/__init__.py +8 -0
  27. resynth-0.1.0/src/resynth/retrieval/query_processor.py +142 -0
  28. resynth-0.1.0/src/resynth/retrieval/retriever.py +218 -0
  29. resynth-0.1.0/src/resynth/synthesis/__init__.py +8 -0
  30. resynth-0.1.0/src/resynth/synthesis/answer_synthesizer.py +323 -0
  31. resynth-0.1.0/src/resynth/synthesis/citation_formatter.py +245 -0
  32. resynth-0.1.0/src/resynth/web.py +23 -0
  33. resynth-0.1.0/src/resynth.egg-info/PKG-INFO +414 -0
  34. resynth-0.1.0/src/resynth.egg-info/SOURCES.txt +41 -0
  35. resynth-0.1.0/src/resynth.egg-info/dependency_links.txt +1 -0
  36. resynth-0.1.0/src/resynth.egg-info/entry_points.txt +2 -0
  37. resynth-0.1.0/src/resynth.egg-info/not-zip-safe +1 -0
  38. resynth-0.1.0/src/resynth.egg-info/requires.txt +37 -0
  39. resynth-0.1.0/src/resynth.egg-info/top_level.txt +1 -0
  40. resynth-0.1.0/tests/__init__.py +3 -0
  41. resynth-0.1.0/tests/test_fetchers.py +122 -0
  42. resynth-0.1.0/tests/test_integration.py +175 -0
  43. resynth-0.1.0/tests/test_processors.py +110 -0
@@ -0,0 +1,25 @@
1
+ # OpenAI API Key (for embeddings and chat completion)
2
+ OPENAI_API_KEY=your_openai_api_key_here
3
+
4
+ # Alternative: Hugging Face model for embeddings
5
+ HUGGINGFACE_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
6
+
7
+ # Vector database settings
8
+ CHROMA_PERSIST_DIRECTORY=./chroma_db
9
+ CHROMA_COLLECTION_NAME=research_papers
10
+
11
+ # Paper fetching settings
12
+ MAX_PAPERS_PER_QUERY=10
13
+ PAPER_DOWNLOAD_DIR=./papers
14
+
15
+ # API settings
16
+ API_HOST=0.0.0.0
17
+ API_PORT=8000
18
+
19
+ # Chunking settings
20
+ CHUNK_SIZE=1000
21
+ CHUNK_OVERLAP=200
22
+
23
+ # Retrieval settings
24
+ TOP_K_RETRIEVAL=5
25
+ SIMILARITY_THRESHOLD=0.7
@@ -0,0 +1,54 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - Initial release of ReSynth research paper synthesis agent
12
+ - Paper fetching from arXiv and PubMed
13
+ - Intelligent text chunking and preprocessing
14
+ - Vector embeddings with ChromaDB storage
15
+ - Query processing and retrieval system
16
+ - AI-powered answer synthesis with citations
17
+ - Multiple citation styles (APA, MLA, numeric, author-date)
18
+ - Web interface with Streamlit
19
+ - REST API with FastAPI
20
+ - Command-line interface
21
+ - Comprehensive test suite
22
+ - Documentation and examples
23
+
24
+ ### Features
25
+ - **Paper Fetching**: Search and download papers from arXiv and PubMed
26
+ - **Text Processing**: Advanced chunking, cleaning, and preprocessing
27
+ - **Vector Storage**: Efficient storage and retrieval with ChromaDB
28
+ - **Query System**: Intelligent query processing with expansion
29
+ - **Answer Synthesis**: AI-powered answers with proper citations
30
+ - **Multiple Interfaces**: Web UI, API, and CLI
31
+ - **Citation Management**: Automatic citation generation in multiple styles
32
+ - **Quality Metrics**: Retrieval quality validation and confidence scoring
33
+
34
+ ## [0.1.0] - 2024-02-14
35
+
36
+ ### Added
37
+ - Initial release
38
+ - Core functionality for research paper synthesis
39
+ - Support for arXiv and PubMed paper fetching
40
+ - Semantic text chunking
41
+ - Vector embeddings and storage
42
+ - Query processing and retrieval
43
+ - Answer synthesis with citations
44
+ - Web interface, API, and CLI
45
+ - Comprehensive documentation
46
+
47
+ ### Technical Details
48
+ - Python 3.8+ support
49
+ - Integration with OpenAI and Hugging Face models
50
+ - ChromaDB for vector storage
51
+ - FastAPI for REST API
52
+ - Streamlit for web interface
53
+ - Comprehensive test coverage
54
+ - Modern packaging with pyproject.toml
@@ -0,0 +1,225 @@
1
+ # Contributing to ReSynth
2
+
3
+ Thank you for your interest in contributing to ReSynth! This document provides guidelines for contributors.
4
+
5
+ ## Getting Started
6
+
7
+ ### Prerequisites
8
+
9
+ - Python 3.8 or higher
10
+ - Git
11
+ - Basic knowledge of Python development
12
+
13
+ ### Development Setup
14
+
15
+ 1. **Fork the repository**
16
+ ```bash
17
+ # Fork on GitHub, then clone your fork
18
+ git clone https://github.com/your-username/resynth.git
19
+ cd resynth
20
+ ```
21
+
22
+ 2. **Set up development environment**
23
+ ```bash
24
+ # Install development dependencies
25
+ make dev-install
26
+
27
+ # Set up pre-commit hooks
28
+ pre-commit install
29
+ ```
30
+
31
+ 3. **Create a virtual environment**
32
+ ```bash
33
+ python -m venv venv
34
+ source venv/bin/activate # On Windows: venv\Scripts\activate
35
+ ```
36
+
37
+ ## Development Workflow
38
+
39
+ ### 1. Create a Branch
40
+
41
+ ```bash
42
+ git checkout -b feature/your-feature-name
43
+ # or
44
+ git checkout -b fix/issue-number
45
+ ```
46
+
47
+ ### 2. Make Changes
48
+
49
+ - Follow the existing code style
50
+ - Add tests for new functionality
51
+ - Update documentation as needed
52
+ - Ensure all tests pass
53
+
54
+ ### 3. Code Quality
55
+
56
+ Run the following checks before committing:
57
+
58
+ ```bash
59
+ # Format code
60
+ make format
61
+
62
+ # Run linting
63
+ make lint
64
+
65
+ # Run tests
66
+ make test
67
+ ```
68
+
69
+ ### 4. Commit Changes
70
+
71
+ Use clear, descriptive commit messages:
72
+
73
+ ```
74
+ feat: add support for new paper source
75
+ fix: resolve chunking issue with large papers
76
+ docs: update API documentation
77
+ test: add integration tests for retrieval system
78
+ ```
79
+
80
+ ### 5. Submit Pull Request
81
+
82
+ - Push your branch to your fork
83
+ - Create a pull request with a clear description
84
+ - Link any relevant issues
85
+ - Wait for code review
86
+
87
+ ## Code Style
88
+
89
+ ### Python Style
90
+
91
+ We use:
92
+ - **Black** for code formatting
93
+ - **isort** for import sorting
94
+ - **flake8** for linting
95
+ - **mypy** for type checking
96
+
97
+ ### Documentation
98
+
99
+ - Use docstrings for all public functions and classes
100
+ - Follow the Google style for docstrings
101
+ - Update README.md for user-facing changes
102
+ - Add inline comments for complex logic
103
+
104
+ ### Testing
105
+
106
+ - Write unit tests for new functionality
107
+ - Add integration tests for major features
108
+ - Maintain test coverage above 80%
109
+ - Use descriptive test names
110
+
111
+ ## Project Structure
112
+
113
+ ```
114
+ resynth/
115
+ ├── src/resynth/ # Main package
116
+ │ ├── fetchers/ # Paper fetching modules
117
+ │ ├── processors/ # Text processing
118
+ │ ├── embeddings/ # Vector embeddings
119
+ │ ├── retrieval/ # Query processing
120
+ │ └── synthesis/ # Answer generation
121
+ ├── tests/ # Test suite
122
+ ├── docs/ # Documentation
123
+ ├── examples/ # Usage examples
124
+ └── scripts/ # Utility scripts
125
+ ```
126
+
127
+ ## Adding Features
128
+
129
+ ### New Paper Sources
130
+
131
+ 1. Create a new fetcher in `src/resynth/fetchers/`
132
+ 2. Inherit from `BaseFetcher`
133
+ 3. Implement required methods
134
+ 4. Add tests
135
+ 5. Update documentation
136
+
137
+ ### New Citation Styles
138
+
139
+ 1. Add formatting methods to `CitationFormatter`
140
+ 2. Update citation style options
141
+ 3. Add tests for new style
142
+ 4. Update documentation
143
+
144
+ ### New Embedding Models
145
+
146
+ 1. Update `EmbeddingManager` to support new model
147
+ 2. Add configuration options
148
+ 3. Add tests
149
+ 4. Update documentation
150
+
151
+ ## Reporting Issues
152
+
153
+ ### Bug Reports
154
+
155
+ - Use the GitHub issue tracker
156
+ - Provide clear reproduction steps
157
+ - Include environment details
158
+ - Add relevant logs or error messages
159
+
160
+ ### Feature Requests
161
+
162
+ - Describe the use case
163
+ - Explain why it's needed
164
+ - Suggest implementation approach
165
+ - Consider potential edge cases
166
+
167
+ ## Review Process
168
+
169
+ ### What We Look For
170
+
171
+ - Code quality and style
172
+ - Test coverage
173
+ - Documentation
174
+ - Performance impact
175
+ - Security considerations
176
+ - Backward compatibility
177
+
178
+ ### Review Guidelines
179
+
180
+ - Be constructive and respectful
181
+ - Focus on the code, not the person
182
+ - Provide specific suggestions
183
+ - Ask questions for clarity
184
+
185
+ ## Release Process
186
+
187
+ ### Version Management
188
+
189
+ - Follow semantic versioning
190
+ - Update version in `src/resynth/__init__.py`
191
+ - Update `CHANGELOG.md`
192
+ - Tag releases on GitHub
193
+
194
+ ### Publishing
195
+
196
+ 1. Update version and changelog
197
+ 2. Run full test suite
198
+ 3. Build package
199
+ 4. Test installation
200
+ 5. Publish to PyPI
201
+
202
+ ## Community Guidelines
203
+
204
+ ### Code of Conduct
205
+
206
+ - Be respectful and inclusive
207
+ - Welcome newcomers
208
+ - Focus on what's best for the community
209
+ - Show empathy towards other community members
210
+
211
+ ### Getting Help
212
+
213
+ - Check documentation first
214
+ - Search existing issues
215
+ - Ask questions in discussions
216
+ - Join community channels
217
+
218
+ ## Recognition
219
+
220
+ Contributors are recognized in:
221
+ - README.md contributors section
222
+ - Release notes
223
+ - GitHub contributors list
224
+
225
+ Thank you for contributing to ReSynth! 🚀
resynth-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 ReSynth Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,73 @@
1
+ # Include additional files in the distribution
2
+
3
+ # Documentation
4
+ include README.md
5
+ include CHANGELOG.md
6
+ include CONTRIBUTING.md
7
+ include LICENSE
8
+ include MANIFEST.in
9
+
10
+ # Configuration files
11
+ include pyproject.toml
12
+ include requirements.txt
13
+ include .env.example
14
+ include Makefile
15
+
16
+ # Source files
17
+ recursive-include src *.py
18
+ recursive-include src *.yaml
19
+ recursive-include src *.yml
20
+ recursive-include src *.json
21
+ recursive-include src *.txt
22
+ recursive-include src *.md
23
+
24
+ # Test files
25
+ recursive-include tests *.py
26
+ recursive-include tests *.yaml
27
+ recursive-include tests *.json
28
+
29
+ # Static files (if any)
30
+ recursive-include src *.css
31
+ recursive-include src *.js
32
+ recursive-include src *.html
33
+ recursive-include src *.png
34
+ recursive-include src *.jpg
35
+ recursive-include src *.gif
36
+ recursive-include src *.ico
37
+
38
+ # Templates (if any)
39
+ recursive-include src *.html
40
+ recursive-include src *.jinja2
41
+
42
+ # Exclude development files
43
+ global-exclude *.pyc
44
+ global-exclude *.pyo
45
+ global-exclude *.pyd
46
+ global-exclude __pycache__
47
+ global-exclude .git*
48
+ global-exclude .DS_Store
49
+ global-exclude *.so
50
+ global-exclude .coverage
51
+ global-exclude .pytest_cache
52
+ global-exclude .mypy_cache
53
+ global-exclude .tox
54
+ global-exclude build
55
+ global-exclude dist
56
+ global-exclude *.egg-info
57
+
58
+ # Exclude environment files
59
+ exclude .env
60
+ exclude .venv
61
+ exclude venv/
62
+ exclude env/
63
+ exclude ENV/
64
+
65
+ # Exclude IDE files
66
+ exclude .vscode/
67
+ exclude .idea/
68
+ exclude *.swp
69
+ exclude *.swo
70
+
71
+ # Exclude OS files
72
+ exclude Thumbs.db
73
+ exclude *.log
resynth-0.1.0/Makefile ADDED
@@ -0,0 +1,86 @@
1
+ # Makefile for ReSynth
2
+
3
+ .PHONY: help install dev-install test lint format clean run-api run-cli run-web
4
+
5
+ # Default target
6
+ help:
7
+ @echo "ReSynth - Research Paper Synthesis Agent"
8
+ @echo ""
9
+ @echo "Available commands:"
10
+ @echo " install Install dependencies"
11
+ @echo " dev-install Install development dependencies"
12
+ @echo " test Run tests"
13
+ @echo " lint Run linting"
14
+ @echo " format Format code"
15
+ @echo " clean Clean temporary files"
16
+ @echo " run-api Run API server"
17
+ @echo " run-cli Run CLI example"
18
+ @echo " run-web Run web interface"
19
+ @echo ""
20
+
21
+ # Installation
22
+ install:
23
+ pip install -r requirements.txt
24
+ python -m spacy download en_core_web_sm
25
+
26
+ dev-install: install
27
+ pip install -e ".[dev]"
28
+
29
+ # Testing
30
+ test:
31
+ python -m pytest tests/ -v --cov=src --cov-report=html --cov-report=term
32
+
33
+ test-quick:
34
+ python -m pytest tests/ -v --tb=short
35
+
36
+ # Code quality
37
+ lint:
38
+ flake8 src/ tests/ main.py cli.py app.py
39
+ mypy src/ --ignore-missing-imports
40
+
41
+ format:
42
+ black src/ tests/ main.py cli.py app.py setup.py
43
+ isort src/ tests/ main.py cli.py app.py setup.py
44
+
45
+ # Cleaning
46
+ clean:
47
+ find . -type f -name "*.pyc" -delete
48
+ find . -type d -name "__pycache__" -delete
49
+ find . -type d -name "*.egg-info" -exec rm -rf {} +
50
+ rm -rf build/
51
+ rm -rf dist/
52
+ rm -rf .coverage
53
+ rm -rf htmlcov/
54
+ rm -rf .pytest_cache/
55
+ rm -rf .mypy_cache/
56
+
57
+ # Running the application
58
+ run-api:
59
+ python main.py
60
+
61
+ run-cli:
62
+ python cli.py --search "machine learning interpretability" --max-papers 3
63
+
64
+ run-web:
65
+ streamlit run app.py
66
+
67
+ # Development helpers
68
+ setup-env:
69
+ cp .env.example .env
70
+ @echo "Please edit .env file with your API keys"
71
+
72
+ check-deps:
73
+ pip check
74
+ pip list | grep -E "(fastapi|streamlit|chromadb|sentence-transformers|arxiv|openai)"
75
+
76
+ # Documentation
77
+ docs:
78
+ @echo "Documentation is available in README.md"
79
+ @echo "API docs: http://localhost:8000/docs (when API server is running)"
80
+
81
+ # Quick start
82
+ quickstart: install
83
+ @echo "Setting up quick start example..."
84
+ python cli.py --search "transformer architectures" --max-papers 2 --no-content
85
+ @echo "Now you can query with:"
86
+ @echo "python cli.py --query 'What are attention mechanisms in transformers?'"