bible-xml-parser 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bible_xml_parser-0.1.0/LICENSE +21 -0
- bible_xml_parser-0.1.0/MANIFEST.in +8 -0
- bible_xml_parser-0.1.0/PKG-INFO +302 -0
- bible_xml_parser-0.1.0/README.md +269 -0
- bible_xml_parser-0.1.0/examples/README.md +113 -0
- bible_xml_parser-0.1.0/examples/bible_small_osis.xml +64 -0
- bible_xml_parser-0.1.0/examples/bible_small_usfx.xml +58 -0
- bible_xml_parser-0.1.0/examples/database_approach.py +79 -0
- bible_xml_parser-0.1.0/examples/direct_parsing.py +69 -0
- bible_xml_parser-0.1.0/examples/search_example.py +74 -0
- bible_xml_parser-0.1.0/pyproject.toml +71 -0
- bible_xml_parser-0.1.0/setup.cfg +4 -0
- bible_xml_parser-0.1.0/src/bible_parser/__init__.py +30 -0
- bible_xml_parser-0.1.0/src/bible_parser/bible_parser.py +143 -0
- bible_xml_parser-0.1.0/src/bible_parser/bible_repository.py +367 -0
- bible_xml_parser-0.1.0/src/bible_parser/errors.py +25 -0
- bible_xml_parser-0.1.0/src/bible_parser/models.py +130 -0
- bible_xml_parser-0.1.0/src/bible_parser/parsers/__init__.py +13 -0
- bible_xml_parser-0.1.0/src/bible_parser/parsers/base_parser.py +104 -0
- bible_xml_parser-0.1.0/src/bible_parser/parsers/osis_parser.py +158 -0
- bible_xml_parser-0.1.0/src/bible_parser/parsers/usfx_parser.py +270 -0
- bible_xml_parser-0.1.0/src/bible_parser/parsers/zefania_parser.py +136 -0
- bible_xml_parser-0.1.0/src/bible_xml_parser.egg-info/PKG-INFO +302 -0
- bible_xml_parser-0.1.0/src/bible_xml_parser.egg-info/SOURCES.txt +29 -0
- bible_xml_parser-0.1.0/src/bible_xml_parser.egg-info/dependency_links.txt +1 -0
- bible_xml_parser-0.1.0/src/bible_xml_parser.egg-info/requires.txt +11 -0
- bible_xml_parser-0.1.0/src/bible_xml_parser.egg-info/top_level.txt +1 -0
- bible_xml_parser-0.1.0/tests/__init__.py +1 -0
- bible_xml_parser-0.1.0/tests/test_bible_parser.py +80 -0
- bible_xml_parser-0.1.0/tests/test_models.py +113 -0
- bible_xml_parser-0.1.0/tests/test_parsers.py +232 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Bible Parser Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bible-xml-parser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python package for parsing Bible texts in various XML formats (USFX, OSIS, ZEFANIA)
|
|
5
|
+
Author: Omar Zintan
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Omarzintan/bible_parser_python
|
|
8
|
+
Project-URL: Repository, https://github.com/Omarzintan/bible_parser_python
|
|
9
|
+
Project-URL: Issues, https://github.com/Omarzintan/bible_parser_python/issues
|
|
10
|
+
Keywords: bible,parser,xml,usfx,osis,zefania,scripture
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Religion
|
|
20
|
+
Classifier: Topic :: Text Processing :: Markup :: XML
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: defusedxml>=0.7.1
|
|
25
|
+
Requires-Dist: typing-extensions>=4.0.0; python_version < "3.9"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
29
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
30
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Bible XML Parser
|
|
35
|
+
|
|
36
|
+
A Python package for parsing Bible texts in various XML formats (USFX, OSIS, ZEFANIA). This package provides both direct parsing and database-backed approaches for handling Bible data in your Python applications.
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- 📖 Parse Bible texts in multiple formats (USFX, OSIS, ZEFANIA)
|
|
41
|
+
- 🔍 Automatic format detection
|
|
42
|
+
- 🚀 Memory-efficient streaming XML parsing using defusedxml
|
|
43
|
+
- 🗄️ SQLite database caching for improved performance
|
|
44
|
+
- 🔎 Full-text search functionality (FTS5)
|
|
45
|
+
- 🔒 Secure XML parsing (protected against XXE attacks)
|
|
46
|
+
- 📝 Type hints throughout for better IDE support
|
|
47
|
+
- 🐍 Python 3.8+ support
|
|
48
|
+
|
|
49
|
+
## Installation
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install bible-xml-parser
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Development Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
git clone https://github.com/Omarzintan/bible_parser_python.git
|
|
59
|
+
cd bible_parser_python
|
|
60
|
+
pip install -e ".[dev]"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
### Direct Parsing Approach
|
|
66
|
+
|
|
67
|
+
Parse a Bible file directly without database caching:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from bible_parser import BibleParser
|
|
71
|
+
|
|
72
|
+
# Parse from file (format auto-detected)
|
|
73
|
+
parser = BibleParser('path/to/bible.xml')
|
|
74
|
+
|
|
75
|
+
# Or parse from string with explicit format
|
|
76
|
+
xml_content = open('bible.xml').read()
|
|
77
|
+
parser = BibleParser.from_string(xml_content, format='USFX')
|
|
78
|
+
|
|
79
|
+
# Iterate over books
|
|
80
|
+
for book in parser.books:
|
|
81
|
+
print(f"{book.title} ({book.id})")
|
|
82
|
+
print(f" Chapters: {len(book.chapters)}")
|
|
83
|
+
print(f" Verses: {len(book.verses)}")
|
|
84
|
+
|
|
85
|
+
# Or iterate over verses directly
|
|
86
|
+
for verse in parser.verses:
|
|
87
|
+
print(f"{verse.book_id} {verse.chapter_num}:{verse.num} - {verse.text}")
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Database Approach (Recommended for Production)
|
|
91
|
+
|
|
92
|
+
For better performance, use the database approach:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from bible_parser import BibleRepository
|
|
96
|
+
|
|
97
|
+
# Create repository
|
|
98
|
+
repo = BibleRepository(xml_path='path/to/bible.xml', format='USFX')
|
|
99
|
+
|
|
100
|
+
# Initialize database (only needed once)
|
|
101
|
+
repo.initialize('my_bible.db')
|
|
102
|
+
|
|
103
|
+
# Get all books
|
|
104
|
+
books = repo.get_books()
|
|
105
|
+
for book in books:
|
|
106
|
+
print(f"{book.title} ({book.id})")
|
|
107
|
+
|
|
108
|
+
# Get verses from a specific chapter
|
|
109
|
+
verses = repo.get_verses('gen', 1) # Genesis chapter 1
|
|
110
|
+
for verse in verses:
|
|
111
|
+
print(f"{verse.num}. {verse.text}")
|
|
112
|
+
|
|
113
|
+
# Get a specific verse
|
|
114
|
+
verse = repo.get_verse('jhn', 3, 16) # John 3:16
|
|
115
|
+
if verse:
|
|
116
|
+
print(verse.text)
|
|
117
|
+
|
|
118
|
+
# Search for verses containing specific text
|
|
119
|
+
results = repo.search_verses('love')
|
|
120
|
+
print(f"Found {len(results)} verses containing 'love'")
|
|
121
|
+
|
|
122
|
+
# Don't forget to close
|
|
123
|
+
repo.close()
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Using Context Manager
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from bible_parser import BibleRepository
|
|
130
|
+
|
|
131
|
+
with BibleRepository(xml_path='bible.xml') as repo:
|
|
132
|
+
repo.initialize('my_bible.db')
|
|
133
|
+
|
|
134
|
+
# Use the repository
|
|
135
|
+
verses = repo.get_verses('mat', 5) # Matthew chapter 5
|
|
136
|
+
for verse in verses:
|
|
137
|
+
print(f"{verse.num}. {verse.text}")
|
|
138
|
+
|
|
139
|
+
# Search
|
|
140
|
+
results = repo.search_verses('faith hope love')
|
|
141
|
+
for verse in results:
|
|
142
|
+
print(f"{verse.book_id} {verse.chapter_num}:{verse.num}")
|
|
143
|
+
|
|
144
|
+
# Database automatically closed
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Supported Formats
|
|
148
|
+
|
|
149
|
+
### USFX (Unified Standard Format XML)
|
|
150
|
+
```xml
|
|
151
|
+
<usfx>
|
|
152
|
+
<book id="gen">
|
|
153
|
+
<c id="1"/>
|
|
154
|
+
<v id="1">In the beginning...</v>
|
|
155
|
+
</book>
|
|
156
|
+
</usfx>
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### OSIS (Open Scripture Information Standard)
|
|
160
|
+
```xml
|
|
161
|
+
<osis>
|
|
162
|
+
<osisText>
|
|
163
|
+
<div type="book" osisID="Gen">
|
|
164
|
+
<verse osisID="Gen.1.1">In the beginning...</verse>
|
|
165
|
+
</div>
|
|
166
|
+
</osisText>
|
|
167
|
+
</osis>
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Zefania XML
|
|
171
|
+
```xml
|
|
172
|
+
<XMLBIBLE>
|
|
173
|
+
<BIBLEBOOK bnumber="1" bname="Genesis">
|
|
174
|
+
<CHAPTER cnumber="1">
|
|
175
|
+
<VERS vnumber="1">In the beginning...</VERS>
|
|
176
|
+
</CHAPTER>
|
|
177
|
+
</BIBLEBOOK>
|
|
178
|
+
</XMLBIBLE>
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## API Reference
|
|
182
|
+
|
|
183
|
+
### BibleParser
|
|
184
|
+
|
|
185
|
+
Main parser class with automatic format detection.
|
|
186
|
+
|
|
187
|
+
**Methods:**
|
|
188
|
+
- `__init__(source, format=None)` - Initialize parser
|
|
189
|
+
- `from_string(xml_content, format=None)` - Create from XML string
|
|
190
|
+
- `books` - Property that yields Book objects
|
|
191
|
+
- `verses` - Property that yields Verse objects
|
|
192
|
+
|
|
193
|
+
### BibleRepository
|
|
194
|
+
|
|
195
|
+
Database-backed repository for efficient Bible data access.
|
|
196
|
+
|
|
197
|
+
**Methods:**
|
|
198
|
+
- `__init__(xml_path=None, xml_string=None, format=None)` - Initialize repository
|
|
199
|
+
- `initialize(database_name)` - Create/open database
|
|
200
|
+
- `get_books()` - Get all books
|
|
201
|
+
- `get_verses(book_id, chapter_num)` - Get verses from a chapter
|
|
202
|
+
- `get_verse(book_id, chapter_num, verse_num)` - Get a specific verse
|
|
203
|
+
- `get_chapter_count(book_id)` - Get number of chapters in a book
|
|
204
|
+
- `search_verses(query, limit=100)` - Full-text search
|
|
205
|
+
- `close()` - Close database connection
|
|
206
|
+
|
|
207
|
+
### Data Models
|
|
208
|
+
|
|
209
|
+
**Verse:**
|
|
210
|
+
- `num` (int) - Verse number
|
|
211
|
+
- `chapter_num` (int) - Chapter number
|
|
212
|
+
- `text` (str) - Verse text
|
|
213
|
+
- `book_id` (str) - Book identifier
|
|
214
|
+
|
|
215
|
+
**Chapter:**
|
|
216
|
+
- `num` (int) - Chapter number
|
|
217
|
+
- `verses` (List[Verse]) - List of verses
|
|
218
|
+
|
|
219
|
+
**Book:**
|
|
220
|
+
- `id` (str) - Book identifier (e.g., 'gen', 'mat')
|
|
221
|
+
- `num` (int) - Book number
|
|
222
|
+
- `title` (str) - Book title (e.g., 'Genesis', 'Matthew')
|
|
223
|
+
- `chapters` (List[Chapter]) - List of chapters
|
|
224
|
+
- `verses` (List[Verse]) - Flat list of all verses
|
|
225
|
+
|
|
226
|
+
## Performance Considerations
|
|
227
|
+
|
|
228
|
+
### Direct Parsing
|
|
229
|
+
**Pros:**
|
|
230
|
+
- Simple implementation
|
|
231
|
+
- No database setup required
|
|
232
|
+
- Always uses the latest source files
|
|
233
|
+
|
|
234
|
+
**Cons:**
|
|
235
|
+
- CPU and memory intensive
|
|
236
|
+
- Slower for repeated access
|
|
237
|
+
- Repeated parsing on each run
|
|
238
|
+
|
|
239
|
+
### Database Approach
|
|
240
|
+
**Pros:**
|
|
241
|
+
- Much faster access once data is loaded
|
|
242
|
+
- Lower memory usage during queries
|
|
243
|
+
- Efficient full-text search with FTS5
|
|
244
|
+
- Works offline without re-parsing
|
|
245
|
+
|
|
246
|
+
**Cons:**
|
|
247
|
+
- Initial setup time
|
|
248
|
+
- Requires disk space
|
|
249
|
+
- Additional complexity
|
|
250
|
+
|
|
251
|
+
## Security
|
|
252
|
+
|
|
253
|
+
This package uses `defusedxml` for secure XML parsing, protecting against:
|
|
254
|
+
- **XXE (XML External Entity) attacks** - Prevents reading local files or making network requests
|
|
255
|
+
- **Billion Laughs attack** - Prevents exponential entity expansion
|
|
256
|
+
- **Quadratic blowup** - Prevents memory exhaustion
|
|
257
|
+
|
|
258
|
+
All database queries use parameterized statements to prevent SQL injection.
|
|
259
|
+
|
|
260
|
+
## Examples
|
|
261
|
+
|
|
262
|
+
See the `examples/` directory for complete working examples:
|
|
263
|
+
- `direct_parsing.py` - Direct parsing example
|
|
264
|
+
- `database_approach.py` - Database caching example
|
|
265
|
+
- `search_example.py` - Full-text search example
|
|
266
|
+
|
|
267
|
+
## Testing
|
|
268
|
+
|
|
269
|
+
Run tests with pytest:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
pytest
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
With coverage:
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
pytest --cov=bible_parser --cov-report=term-missing
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Contributing
|
|
282
|
+
|
|
283
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
284
|
+
|
|
285
|
+
## License
|
|
286
|
+
|
|
287
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
288
|
+
|
|
289
|
+
## Acknowledgments
|
|
290
|
+
|
|
291
|
+
- Inspired by the Ruby [bible_parser](https://github.com/seven1m/bible_parser) library
|
|
292
|
+
- Flutter [bible_parser_flutter](../bible_parser_flutter/) implementation
|
|
293
|
+
- Bible XML files from the [open-bibles](https://github.com/seven1m/open-bibles) repository
|
|
294
|
+
|
|
295
|
+
## Changelog
|
|
296
|
+
|
|
297
|
+
See [CHANGELOG.md](CHANGELOG.md) for version history.
|
|
298
|
+
|
|
299
|
+
## Support
|
|
300
|
+
|
|
301
|
+
- 📫 Issues: [GitHub Issues](https://github.com/Omarzintan/bible_parser_python/issues)
|
|
302
|
+
- 📖 Documentation: [GitHub Wiki](https://github.com/Omarzintan/bible_parser_python/wiki)
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# Bible XML Parser
|
|
2
|
+
|
|
3
|
+
A Python package for parsing Bible texts in various XML formats (USFX, OSIS, ZEFANIA). This package provides both direct parsing and database-backed approaches for handling Bible data in your Python applications.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 📖 Parse Bible texts in multiple formats (USFX, OSIS, ZEFANIA)
|
|
8
|
+
- 🔍 Automatic format detection
|
|
9
|
+
- 🚀 Memory-efficient streaming XML parsing using defusedxml
|
|
10
|
+
- 🗄️ SQLite database caching for improved performance
|
|
11
|
+
- 🔎 Full-text search functionality (FTS5)
|
|
12
|
+
- 🔒 Secure XML parsing (protected against XXE attacks)
|
|
13
|
+
- 📝 Type hints throughout for better IDE support
|
|
14
|
+
- 🐍 Python 3.8+ support
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install bible-xml-parser
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Development Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
git clone https://github.com/Omarzintan/bible_parser_python.git
|
|
26
|
+
cd bible_parser_python
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
### Direct Parsing Approach
|
|
33
|
+
|
|
34
|
+
Parse a Bible file directly without database caching:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from bible_parser import BibleParser
|
|
38
|
+
|
|
39
|
+
# Parse from file (format auto-detected)
|
|
40
|
+
parser = BibleParser('path/to/bible.xml')
|
|
41
|
+
|
|
42
|
+
# Or parse from string with explicit format
|
|
43
|
+
xml_content = open('bible.xml').read()
|
|
44
|
+
parser = BibleParser.from_string(xml_content, format='USFX')
|
|
45
|
+
|
|
46
|
+
# Iterate over books
|
|
47
|
+
for book in parser.books:
|
|
48
|
+
print(f"{book.title} ({book.id})")
|
|
49
|
+
print(f" Chapters: {len(book.chapters)}")
|
|
50
|
+
print(f" Verses: {len(book.verses)}")
|
|
51
|
+
|
|
52
|
+
# Or iterate over verses directly
|
|
53
|
+
for verse in parser.verses:
|
|
54
|
+
print(f"{verse.book_id} {verse.chapter_num}:{verse.num} - {verse.text}")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Database Approach (Recommended for Production)
|
|
58
|
+
|
|
59
|
+
For better performance, use the database approach:
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from bible_parser import BibleRepository
|
|
63
|
+
|
|
64
|
+
# Create repository
|
|
65
|
+
repo = BibleRepository(xml_path='path/to/bible.xml', format='USFX')
|
|
66
|
+
|
|
67
|
+
# Initialize database (only needed once)
|
|
68
|
+
repo.initialize('my_bible.db')
|
|
69
|
+
|
|
70
|
+
# Get all books
|
|
71
|
+
books = repo.get_books()
|
|
72
|
+
for book in books:
|
|
73
|
+
print(f"{book.title} ({book.id})")
|
|
74
|
+
|
|
75
|
+
# Get verses from a specific chapter
|
|
76
|
+
verses = repo.get_verses('gen', 1) # Genesis chapter 1
|
|
77
|
+
for verse in verses:
|
|
78
|
+
print(f"{verse.num}. {verse.text}")
|
|
79
|
+
|
|
80
|
+
# Get a specific verse
|
|
81
|
+
verse = repo.get_verse('jhn', 3, 16) # John 3:16
|
|
82
|
+
if verse:
|
|
83
|
+
print(verse.text)
|
|
84
|
+
|
|
85
|
+
# Search for verses containing specific text
|
|
86
|
+
results = repo.search_verses('love')
|
|
87
|
+
print(f"Found {len(results)} verses containing 'love'")
|
|
88
|
+
|
|
89
|
+
# Don't forget to close
|
|
90
|
+
repo.close()
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Using Context Manager
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from bible_parser import BibleRepository
|
|
97
|
+
|
|
98
|
+
with BibleRepository(xml_path='bible.xml') as repo:
|
|
99
|
+
repo.initialize('my_bible.db')
|
|
100
|
+
|
|
101
|
+
# Use the repository
|
|
102
|
+
verses = repo.get_verses('mat', 5) # Matthew chapter 5
|
|
103
|
+
for verse in verses:
|
|
104
|
+
print(f"{verse.num}. {verse.text}")
|
|
105
|
+
|
|
106
|
+
# Search
|
|
107
|
+
results = repo.search_verses('faith hope love')
|
|
108
|
+
for verse in results:
|
|
109
|
+
print(f"{verse.book_id} {verse.chapter_num}:{verse.num}")
|
|
110
|
+
|
|
111
|
+
# Database automatically closed
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Supported Formats
|
|
115
|
+
|
|
116
|
+
### USFX (Unified Standard Format XML)
|
|
117
|
+
```xml
|
|
118
|
+
<usfx>
|
|
119
|
+
<book id="gen">
|
|
120
|
+
<c id="1"/>
|
|
121
|
+
<v id="1">In the beginning...</v>
|
|
122
|
+
</book>
|
|
123
|
+
</usfx>
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### OSIS (Open Scripture Information Standard)
|
|
127
|
+
```xml
|
|
128
|
+
<osis>
|
|
129
|
+
<osisText>
|
|
130
|
+
<div type="book" osisID="Gen">
|
|
131
|
+
<verse osisID="Gen.1.1">In the beginning...</verse>
|
|
132
|
+
</div>
|
|
133
|
+
</osisText>
|
|
134
|
+
</osis>
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Zefania XML
|
|
138
|
+
```xml
|
|
139
|
+
<XMLBIBLE>
|
|
140
|
+
<BIBLEBOOK bnumber="1" bname="Genesis">
|
|
141
|
+
<CHAPTER cnumber="1">
|
|
142
|
+
<VERS vnumber="1">In the beginning...</VERS>
|
|
143
|
+
</CHAPTER>
|
|
144
|
+
</BIBLEBOOK>
|
|
145
|
+
</XMLBIBLE>
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## API Reference
|
|
149
|
+
|
|
150
|
+
### BibleParser
|
|
151
|
+
|
|
152
|
+
Main parser class with automatic format detection.
|
|
153
|
+
|
|
154
|
+
**Methods:**
|
|
155
|
+
- `__init__(source, format=None)` - Initialize parser
|
|
156
|
+
- `from_string(xml_content, format=None)` - Create from XML string
|
|
157
|
+
- `books` - Property that yields Book objects
|
|
158
|
+
- `verses` - Property that yields Verse objects
|
|
159
|
+
|
|
160
|
+
### BibleRepository
|
|
161
|
+
|
|
162
|
+
Database-backed repository for efficient Bible data access.
|
|
163
|
+
|
|
164
|
+
**Methods:**
|
|
165
|
+
- `__init__(xml_path=None, xml_string=None, format=None)` - Initialize repository
|
|
166
|
+
- `initialize(database_name)` - Create/open database
|
|
167
|
+
- `get_books()` - Get all books
|
|
168
|
+
- `get_verses(book_id, chapter_num)` - Get verses from a chapter
|
|
169
|
+
- `get_verse(book_id, chapter_num, verse_num)` - Get a specific verse
|
|
170
|
+
- `get_chapter_count(book_id)` - Get number of chapters in a book
|
|
171
|
+
- `search_verses(query, limit=100)` - Full-text search
|
|
172
|
+
- `close()` - Close database connection
|
|
173
|
+
|
|
174
|
+
### Data Models
|
|
175
|
+
|
|
176
|
+
**Verse:**
|
|
177
|
+
- `num` (int) - Verse number
|
|
178
|
+
- `chapter_num` (int) - Chapter number
|
|
179
|
+
- `text` (str) - Verse text
|
|
180
|
+
- `book_id` (str) - Book identifier
|
|
181
|
+
|
|
182
|
+
**Chapter:**
|
|
183
|
+
- `num` (int) - Chapter number
|
|
184
|
+
- `verses` (List[Verse]) - List of verses
|
|
185
|
+
|
|
186
|
+
**Book:**
|
|
187
|
+
- `id` (str) - Book identifier (e.g., 'gen', 'mat')
|
|
188
|
+
- `num` (int) - Book number
|
|
189
|
+
- `title` (str) - Book title (e.g., 'Genesis', 'Matthew')
|
|
190
|
+
- `chapters` (List[Chapter]) - List of chapters
|
|
191
|
+
- `verses` (List[Verse]) - Flat list of all verses
|
|
192
|
+
|
|
193
|
+
## Performance Considerations
|
|
194
|
+
|
|
195
|
+
### Direct Parsing
|
|
196
|
+
**Pros:**
|
|
197
|
+
- Simple implementation
|
|
198
|
+
- No database setup required
|
|
199
|
+
- Always uses the latest source files
|
|
200
|
+
|
|
201
|
+
**Cons:**
|
|
202
|
+
- CPU and memory intensive
|
|
203
|
+
- Slower for repeated access
|
|
204
|
+
- Repeated parsing on each run
|
|
205
|
+
|
|
206
|
+
### Database Approach
|
|
207
|
+
**Pros:**
|
|
208
|
+
- Much faster access once data is loaded
|
|
209
|
+
- Lower memory usage during queries
|
|
210
|
+
- Efficient full-text search with FTS5
|
|
211
|
+
- Works offline without re-parsing
|
|
212
|
+
|
|
213
|
+
**Cons:**
|
|
214
|
+
- Initial setup time
|
|
215
|
+
- Requires disk space
|
|
216
|
+
- Additional complexity
|
|
217
|
+
|
|
218
|
+
## Security
|
|
219
|
+
|
|
220
|
+
This package uses `defusedxml` for secure XML parsing, protecting against:
|
|
221
|
+
- **XXE (XML External Entity) attacks** - Prevents reading local files or making network requests
|
|
222
|
+
- **Billion Laughs attack** - Prevents exponential entity expansion
|
|
223
|
+
- **Quadratic blowup** - Prevents memory exhaustion
|
|
224
|
+
|
|
225
|
+
All database queries use parameterized statements to prevent SQL injection.
|
|
226
|
+
|
|
227
|
+
## Examples
|
|
228
|
+
|
|
229
|
+
See the `examples/` directory for complete working examples:
|
|
230
|
+
- `direct_parsing.py` - Direct parsing example
|
|
231
|
+
- `database_approach.py` - Database caching example
|
|
232
|
+
- `search_example.py` - Full-text search example
|
|
233
|
+
|
|
234
|
+
## Testing
|
|
235
|
+
|
|
236
|
+
Run tests with pytest:
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
pytest
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
With coverage:
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
pytest --cov=bible_parser --cov-report=term-missing
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Contributing
|
|
249
|
+
|
|
250
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
251
|
+
|
|
252
|
+
## License
|
|
253
|
+
|
|
254
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
255
|
+
|
|
256
|
+
## Acknowledgments
|
|
257
|
+
|
|
258
|
+
- Inspired by the Ruby [bible_parser](https://github.com/seven1m/bible_parser) library
|
|
259
|
+
- Flutter [bible_parser_flutter](../bible_parser_flutter/) implementation
|
|
260
|
+
- Bible XML files from the [open-bibles](https://github.com/seven1m/open-bibles) repository
|
|
261
|
+
|
|
262
|
+
## Changelog
|
|
263
|
+
|
|
264
|
+
See [CHANGELOG.md](CHANGELOG.md) for version history.
|
|
265
|
+
|
|
266
|
+
## Support
|
|
267
|
+
|
|
268
|
+
- 📫 Issues: [GitHub Issues](https://github.com/Omarzintan/bible_parser_python/issues)
|
|
269
|
+
- 📖 Documentation: [GitHub Wiki](https://github.com/Omarzintan/bible_parser_python/wiki)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Bible Parser Examples
|
|
2
|
+
|
|
3
|
+
This directory contains example scripts demonstrating how to use the bible_parser package.
|
|
4
|
+
|
|
5
|
+
## Sample Files
|
|
6
|
+
|
|
7
|
+
- **`bible_small_usfx.xml`** - Small sample Bible in USFX format (Genesis 1-2)
|
|
8
|
+
- **`bible_small_osis.xml`** - Small sample Bible in OSIS format (Genesis 1-2)
|
|
9
|
+
|
|
10
|
+
## Example Scripts
|
|
11
|
+
|
|
12
|
+
### 1. Direct Parsing (`direct_parsing.py`)
|
|
13
|
+
|
|
14
|
+
Demonstrates parsing Bible XML files directly without database caching.
|
|
15
|
+
|
|
16
|
+
**Run:**
|
|
17
|
+
```bash
|
|
18
|
+
cd examples
|
|
19
|
+
python direct_parsing.py
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Features:**
|
|
23
|
+
- Auto-detect Bible format
|
|
24
|
+
- Iterate over books
|
|
25
|
+
- Access verses directly
|
|
26
|
+
- Parse from string
|
|
27
|
+
|
|
28
|
+
### 2. Database Approach (`database_approach.py`)
|
|
29
|
+
|
|
30
|
+
Demonstrates using SQLite database for caching and fast access.
|
|
31
|
+
|
|
32
|
+
**Run:**
|
|
33
|
+
```bash
|
|
34
|
+
cd examples
|
|
35
|
+
python database_approach.py
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Features:**
|
|
39
|
+
- Database initialization
|
|
40
|
+
- List all books
|
|
41
|
+
- Get verses from specific chapters
|
|
42
|
+
- Get specific verses
|
|
43
|
+
- Full-text search
|
|
44
|
+
|
|
45
|
+
### 3. Search Example (`search_example.py`)
|
|
46
|
+
|
|
47
|
+
Demonstrates full-text search capabilities using SQLite FTS5.
|
|
48
|
+
|
|
49
|
+
**Run:**
|
|
50
|
+
```bash
|
|
51
|
+
cd examples
|
|
52
|
+
python search_example.py
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
**Features:**
|
|
56
|
+
- Simple text search
|
|
57
|
+
- Multi-word search
|
|
58
|
+
- Search in specific books
|
|
59
|
+
- Search statistics
|
|
60
|
+
|
|
61
|
+
## Using Your Own Bible Files
|
|
62
|
+
|
|
63
|
+
To use your own Bible XML files, simply change the `xml_file` variable in any example:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
# Use your own file
|
|
67
|
+
xml_file = "/path/to/your/bible.xml"
|
|
68
|
+
|
|
69
|
+
# Or use one of the sample files
|
|
70
|
+
xml_file = "bible_small_usfx.xml"
|
|
71
|
+
xml_file = "bible_small_osis.xml"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Supported Formats
|
|
75
|
+
|
|
76
|
+
The parser automatically detects these formats:
|
|
77
|
+
- **USFX** - Unified Standard Format XML
|
|
78
|
+
- **OSIS** - Open Scripture Information Standard
|
|
79
|
+
- **ZEFANIA** - Zefania XML Bible Markup Language
|
|
80
|
+
|
|
81
|
+
## Output
|
|
82
|
+
|
|
83
|
+
Each example will:
|
|
84
|
+
1. Parse the Bible file
|
|
85
|
+
2. Display information about books, chapters, and verses
|
|
86
|
+
3. Demonstrate specific features
|
|
87
|
+
4. Clean up resources automatically
|
|
88
|
+
|
|
89
|
+
## Database Files
|
|
90
|
+
|
|
91
|
+
The database examples create a `my_bible.db` file in the examples directory. This file can be reused across runs for faster access. Delete it to re-import from XML.
|
|
92
|
+
|
|
93
|
+
## Requirements
|
|
94
|
+
|
|
95
|
+
Make sure the bible_parser package is installed:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
cd ..
|
|
99
|
+
pip install -e .
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Getting More Bible Files
|
|
103
|
+
|
|
104
|
+
You can find more Bible XML files at:
|
|
105
|
+
- [Open Bibles Repository](https://github.com/seven1m/open-bibles)
|
|
106
|
+
- Various Bible translation websites
|
|
107
|
+
|
|
108
|
+
## Notes
|
|
109
|
+
|
|
110
|
+
- The sample files contain only Genesis chapters 1-2 for demonstration purposes
|
|
111
|
+
- For production use, download complete Bible translations
|
|
112
|
+
- The database approach is recommended for applications that need repeated access
|
|
113
|
+
- The direct parsing approach is good for one-time processing or when you need the latest data
|