barscan 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. barscan-0.2.2/.env.example +7 -0
  2. barscan-0.2.2/.github/workflows/ci.yml +37 -0
  3. barscan-0.2.2/.github/workflows/release.yml +72 -0
  4. barscan-0.2.2/.gitignore +84 -0
  5. barscan-0.2.2/LICENSE +21 -0
  6. barscan-0.2.2/PKG-INFO +354 -0
  7. barscan-0.2.2/README.md +315 -0
  8. barscan-0.2.2/pyproject.toml +81 -0
  9. barscan-0.2.2/src/barscan/__init__.py +30 -0
  10. barscan-0.2.2/src/barscan/__main__.py +6 -0
  11. barscan-0.2.2/src/barscan/analyzer/__init__.py +71 -0
  12. barscan-0.2.2/src/barscan/analyzer/context.py +172 -0
  13. barscan-0.2.2/src/barscan/analyzer/filters.py +218 -0
  14. barscan-0.2.2/src/barscan/analyzer/frequency.py +260 -0
  15. barscan-0.2.2/src/barscan/analyzer/models.py +186 -0
  16. barscan-0.2.2/src/barscan/analyzer/nltk_resources.py +80 -0
  17. barscan-0.2.2/src/barscan/analyzer/pos.py +134 -0
  18. barscan-0.2.2/src/barscan/analyzer/processor.py +341 -0
  19. barscan-0.2.2/src/barscan/analyzer/sentiment.py +110 -0
  20. barscan-0.2.2/src/barscan/analyzer/slang.py +240 -0
  21. barscan-0.2.2/src/barscan/analyzer/stopwords_ja.py +23 -0
  22. barscan-0.2.2/src/barscan/analyzer/tfidf.py +146 -0
  23. barscan-0.2.2/src/barscan/analyzer/tokenizer.py +282 -0
  24. barscan-0.2.2/src/barscan/cli.py +528 -0
  25. barscan-0.2.2/src/barscan/config.py +57 -0
  26. barscan-0.2.2/src/barscan/exceptions.py +91 -0
  27. barscan-0.2.2/src/barscan/genius/__init__.py +15 -0
  28. barscan-0.2.2/src/barscan/genius/cache.py +195 -0
  29. barscan-0.2.2/src/barscan/genius/client.py +364 -0
  30. barscan-0.2.2/src/barscan/genius/models.py +73 -0
  31. barscan-0.2.2/src/barscan/logging.py +48 -0
  32. barscan-0.2.2/src/barscan/output/__init__.py +25 -0
  33. barscan-0.2.2/src/barscan/output/wordgrain.py +344 -0
  34. barscan-0.2.2/src/barscan/py.typed +0 -0
  35. barscan-0.2.2/tests/__init__.py +1 -0
  36. barscan-0.2.2/tests/test_analyzer/__init__.py +1 -0
  37. barscan-0.2.2/tests/test_analyzer/conftest.py +66 -0
  38. barscan-0.2.2/tests/test_analyzer/test_context.py +271 -0
  39. barscan-0.2.2/tests/test_analyzer/test_filters.py +337 -0
  40. barscan-0.2.2/tests/test_analyzer/test_frequency.py +475 -0
  41. barscan-0.2.2/tests/test_analyzer/test_models.py +222 -0
  42. barscan-0.2.2/tests/test_analyzer/test_pos.py +95 -0
  43. barscan-0.2.2/tests/test_analyzer/test_processor.py +456 -0
  44. barscan-0.2.2/tests/test_analyzer/test_sentiment.py +119 -0
  45. barscan-0.2.2/tests/test_analyzer/test_slang.py +139 -0
  46. barscan-0.2.2/tests/test_analyzer/test_tfidf.py +178 -0
  47. barscan-0.2.2/tests/test_analyzer/test_tokenizer.py +457 -0
  48. barscan-0.2.2/tests/test_cli/__init__.py +1 -0
  49. barscan-0.2.2/tests/test_cli/conftest.py +90 -0
  50. barscan-0.2.2/tests/test_cli/test_commands.py +654 -0
  51. barscan-0.2.2/tests/test_genius/__init__.py +1 -0
  52. barscan-0.2.2/tests/test_genius/conftest.py +81 -0
  53. barscan-0.2.2/tests/test_genius/test_cache.py +463 -0
  54. barscan-0.2.2/tests/test_genius/test_client.py +553 -0
  55. barscan-0.2.2/tests/test_genius/test_models.py +181 -0
  56. barscan-0.2.2/tests/test_main.py +34 -0
  57. barscan-0.2.2/tests/test_output/__init__.py +1 -0
  58. barscan-0.2.2/tests/test_output/test_wordgrain.py +496 -0
@@ -0,0 +1,7 @@
1
+ # Genius API Access Token
2
+ # Get your token at: https://genius.com/api-clients
3
+ BARSCAN_GENIUS_ACCESS_TOKEN=your_access_token_here
4
+
5
+ # Optional: Cache settings
6
+ # BARSCAN_CACHE_DIR=~/.cache/barscan
7
+ # BARSCAN_CACHE_TTL_HOURS=168
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: pip install -e ".[dev]"
26
+
27
+ - name: Lint
28
+ run: ruff check src/
29
+
30
+ - name: Format check
31
+ run: ruff format --check src/
32
+
33
+ - name: Type check
34
+ run: mypy src/barscan/ --ignore-missing-imports
35
+
36
+ - name: Test
37
+ run: pytest
@@ -0,0 +1,72 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version: ["3.11", "3.12"]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Set up Python ${{ matrix.python-version }}
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+
23
+ - name: Install dependencies
24
+ run: pip install -e ".[dev]"
25
+
26
+ - name: Lint
27
+ run: ruff check src/
28
+
29
+ - name: Format check
30
+ run: ruff format --check src/
31
+
32
+ - name: Type check
33
+ run: mypy src/barscan/ --ignore-missing-imports
34
+
35
+ - name: Test
36
+ run: pytest
37
+
38
+ release:
39
+ needs: test
40
+ runs-on: ubuntu-latest
41
+ permissions:
42
+ contents: write
43
+ id-token: write
44
+ environment:
45
+ name: pypi
46
+ url: https://pypi.org/p/barscan
47
+
48
+ steps:
49
+ - uses: actions/checkout@v4
50
+
51
+ - name: Set up Python
52
+ uses: actions/setup-python@v5
53
+ with:
54
+ python-version: "3.12"
55
+
56
+ - name: Install build dependencies
57
+ run: pip install build
58
+
59
+ - name: Build package
60
+ run: python -m build
61
+
62
+ - name: Create GitHub Release
63
+ env:
64
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65
+ run: |
66
+ gh release create ${{ github.ref_name }} \
67
+ --title "Release ${{ github.ref_name }}" \
68
+ --generate-notes \
69
+ dist/*
70
+
71
+ - name: Publish to PyPI
72
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,84 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ *.manifest
29
+ *.spec
30
+
31
+ # Installer logs
32
+ pip-log.txt
33
+ pip-delete-this-directory.txt
34
+
35
+ # Unit test / coverage reports
36
+ htmlcov/
37
+ .tox/
38
+ .nox/
39
+ .coverage
40
+ .coverage.*
41
+ .cache
42
+ nosetests.xml
43
+ coverage.xml
44
+ *.cover
45
+ *.py,cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Environments
54
+ .env
55
+ .venv
56
+ env/
57
+ venv/
58
+ ENV/
59
+ env.bak/
60
+ venv.bak/
61
+
62
+ # mypy
63
+ .mypy_cache/
64
+ .dmypy.json
65
+ dmypy.json
66
+
67
+ # ruff
68
+ .ruff_cache/
69
+
70
+ # IDE
71
+ .idea/
72
+ .vscode/
73
+ *.swp
74
+ *.swo
75
+ *~
76
+
77
+ # OS
78
+ .DS_Store
79
+ Thumbs.db
80
+
81
+ # Project specific
82
+ .cache/
83
+ *.log
84
+ CLAUDE.md
barscan-0.2.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 shimpeiws
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
barscan-0.2.2/PKG-INFO ADDED
@@ -0,0 +1,354 @@
1
+ Metadata-Version: 2.4
2
+ Name: barscan
3
+ Version: 0.2.2
4
+ Summary: Lyrics word frequency analyzer using Genius API
5
+ Project-URL: Homepage, https://github.com/shimpeiws/barscan
6
+ Project-URL: Repository, https://github.com/shimpeiws/barscan.git
7
+ Project-URL: Issues, https://github.com/shimpeiws/barscan/issues
8
+ Project-URL: Changelog, https://github.com/shimpeiws/barscan/releases
9
+ Project-URL: WordGrain Schema, https://github.com/shimpeiws/word-grain
10
+ Author-email: shimpeiws <shimpeiws@gmail.com>
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: cli,genius,lyrics,nlp,word-frequency
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Environment :: Console
16
+ Classifier: Intended Audience :: End Users/Desktop
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Text Processing :: Linguistic
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: lyricsgenius>=3.0.1
23
+ Requires-Dist: nltk>=3.9.0
24
+ Requires-Dist: pydantic-settings>=2.1.0
25
+ Requires-Dist: pydantic>=2.5.0
26
+ Requires-Dist: rich>=13.7.0
27
+ Requires-Dist: typer[all]>=0.15.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy>=1.8.0; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
31
+ Requires-Dist: pytest-mock>=3.12.0; extra == 'dev'
32
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
33
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
34
+ Requires-Dist: types-requests>=2.31.0; extra == 'dev'
35
+ Provides-Extra: japanese
36
+ Requires-Dist: janome>=0.5.0; extra == 'japanese'
37
+ Requires-Dist: stopwordsiso>=0.6.1; extra == 'japanese'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # BarScan
41
+
42
+ A Python CLI tool that analyzes word frequency in song lyrics using the Genius API.
43
+
44
+ ## Features
45
+
46
+ - Fetch lyrics for any artist from the Genius API
47
+ - Analyze word frequency across multiple songs
48
+ - Natural language processing with NLTK for accurate tokenization
49
+ - Customizable stop word filtering and exclusions
50
+ - Multiple output formats: table, JSON, CSV, and WordGrain
51
+ - Local caching to reduce API calls and improve performance
52
+ - Retry logic with exponential backoff for robust API communication
53
+
54
+ ## Installation
55
+
56
+ ### Prerequisites
57
+
58
+ - Python 3.11 or higher
59
+ - pip (latest version recommended)
60
+
61
+ ### From PyPI (when published)
62
+
63
+ ```bash
64
+ pip install barscan
65
+ ```
66
+
67
+ ### From Source
68
+
69
+ ```bash
70
+ git clone https://github.com/shimpeiws/barscan.git
71
+ cd barscan
72
+ pip install -e ".[dev]"
73
+ ```
74
+
75
+ ## Setup
76
+
77
+ ### Getting a Genius API Token
78
+
79
+ 1. Go to [Genius API Clients](https://genius.com/api-clients)
80
+ 2. Sign in with your Genius account (or create one)
81
+ 3. Click "Create an API Client"
82
+ 4. Fill in the app details:
83
+ - App Name: Any name (e.g., "BarScan CLI")
84
+ - App Website URL: Any URL (e.g., your GitHub profile)
85
+ - Redirect URI: Leave default or use `http://localhost`
86
+ 5. Click "Save"
87
+ 6. Copy the "Client Access Token" (not the Client ID or Secret)
88
+
89
+ ### Configuring the Token
90
+
91
+ Set the token as an environment variable:
92
+
93
+ ```bash
94
+ export BARSCAN_GENIUS_ACCESS_TOKEN=your_token_here
95
+ ```
96
+
97
+ Or create a `.env` file in your project directory:
98
+
99
+ ```bash
100
+ BARSCAN_GENIUS_ACCESS_TOKEN=your_token_here
101
+ ```
102
+
103
+ ## Usage
104
+
105
+ ### Basic Analysis
106
+
107
+ Analyze the most common words in an artist's lyrics:
108
+
109
+ ```bash
110
+ barscan analyze "Kendrick Lamar"
111
+ ```
112
+
113
+ ### Command Options
114
+
115
+ ```bash
116
+ # Analyze more songs
117
+ barscan analyze "Drake" --max-songs 20
118
+
119
+ # Show more words in results
120
+ barscan analyze "J. Cole" --top 100
121
+
122
+ # Combine options
123
+ barscan analyze "Tyler, The Creator" -n 15 -t 50
124
+ ```
125
+
126
+ ### Output Formats
127
+
128
+ ```bash
129
+ # Default table format (console)
130
+ barscan analyze "Beyonce"
131
+
132
+ # JSON format
133
+ barscan analyze "Beyonce" --format json
134
+
135
+ # CSV format
136
+ barscan analyze "Beyonce" --format csv
137
+
138
+ # WordGrain format (structured JSON schema)
139
+ barscan analyze "Beyonce" --format wordgrain
140
+
141
+ # Save to file
142
+ barscan analyze "Beyonce" --format json --output results.json
143
+ ```
144
+
145
+ ### Filtering Options
146
+
147
+ ```bash
148
+ # Disable stop word filtering (include "the", "a", "is", etc.)
149
+ barscan analyze "Eminem" --no-stop-words
150
+
151
+ # Exclude specific words
152
+ barscan analyze "Eminem" --exclude "yeah" --exclude "oh"
153
+
154
+ # Combine exclusions
155
+ barscan analyze "Eminem" -e "uh" -e "like" -e "yo"
156
+ ```
157
+
158
+ ### Cache Management
159
+
160
+ BarScan caches lyrics locally to reduce API calls:
161
+
162
+ ```bash
163
+ # Clear all cached lyrics
164
+ barscan clear-cache --force
165
+
166
+ # Clear only expired cache entries
167
+ barscan clear-cache --expired-only --force
168
+
169
+ # Interactive confirmation (without --force)
170
+ barscan clear-cache
171
+ ```
172
+
173
+ ### View Configuration
174
+
175
+ ```bash
176
+ # Show current configuration and cache statistics
177
+ barscan config
178
+ ```
179
+
180
+ ## Configuration Options
181
+
182
+ All settings can be configured via environment variables with the `BARSCAN_` prefix:
183
+
184
+ | Variable | Description | Default |
185
+ |----------|-------------|---------|
186
+ | `BARSCAN_GENIUS_ACCESS_TOKEN` | Genius API access token | (required) |
187
+ | `BARSCAN_CACHE_DIR` | Directory for caching lyrics | `~/.cache/barscan` |
188
+ | `BARSCAN_CACHE_TTL_HOURS` | Cache time-to-live in hours | `168` (7 days) |
189
+ | `BARSCAN_DEFAULT_MAX_SONGS` | Default number of songs to analyze | `10` |
190
+ | `BARSCAN_DEFAULT_TOP_WORDS` | Default number of top words to show | `50` |
191
+
192
+ ## Output Formats
193
+
194
+ ### Table Format (default)
195
+
196
+ Human-readable table with word rankings:
197
+
198
+ ```
199
+ Artist: Kendrick Lamar
200
+ Songs analyzed: 10
201
+ Total words: 5,432
202
+ Unique words: 1,203
203
+
204
+ Word Frequencies
205
+ ┌──────┬─────────┬───────┬────────────┐
206
+ │ Rank │ Word │ Count │ Percentage │
207
+ ├──────┼─────────┼───────┼────────────┤
208
+ │ 1 │ love │ 87 │ 1.60% │
209
+ │ 2 │ know │ 65 │ 1.20% │
210
+ │ ... │ ... │ ... │ ... │
211
+ └──────┴─────────┴───────┴────────────┘
212
+ ```
213
+
214
+ ### JSON Format
215
+
216
+ Structured JSON for programmatic use:
217
+
218
+ ```json
219
+ {
220
+ "artist": "Kendrick Lamar",
221
+ "songs_analyzed": 10,
222
+ "total_words": 5432,
223
+ "unique_words": 1203,
224
+ "frequencies": [
225
+ {"word": "love", "count": 87, "percentage": 1.60},
226
+ {"word": "know", "count": 65, "percentage": 1.20}
227
+ ]
228
+ }
229
+ ```
230
+
231
+ ### CSV Format
232
+
233
+ Comma-separated values for spreadsheet import:
234
+
235
+ ```csv
236
+ word,count,percentage
237
+ love,87,1.60
238
+ know,65,1.20
239
+ ```
240
+
241
+ ### WordGrain Format
242
+
243
+ [WordGrain](https://github.com/shimpeiws/word-grain) is a standardized JSON schema for vocabulary analysis data. It enables interoperability between different word frequency analysis tools.
244
+
245
+ Output example:
246
+
247
+ ```json
248
+ {
249
+ "$schema": "https://raw.githubusercontent.com/shimpeiws/word-grain/main/schema/v0.1.0/wordgrain.schema.json",
250
+ "meta": {
251
+ "source": "genius",
252
+ "artist": "Kendrick Lamar",
253
+ "generated_at": "2024-01-15T10:30:00Z",
254
+ "corpus_size": 10,
255
+ "total_words": 5432,
256
+ "generator": "barscan/0.1.0",
257
+ "language": "en"
258
+ },
259
+ "grains": [
260
+ {"word": "love", "frequency": 87, "frequency_normalized": 160.18}
261
+ ]
262
+ }
263
+ ```
264
+
265
+ ## Development
266
+
267
+ ### Setup
268
+
269
+ ```bash
270
+ # Clone repository
271
+ git clone https://github.com/shimpeiws/barscan.git
272
+ cd barscan
273
+
274
+ # Install with development dependencies
275
+ pip install -e ".[dev]"
276
+ ```
277
+
278
+ ### Running Tests
279
+
280
+ ```bash
281
+ # Run all tests with coverage
282
+ pytest
283
+
284
+ # Run specific test file
285
+ pytest tests/test_genius/test_client.py -v
286
+
287
+ # Run specific test
288
+ pytest tests/test_genius/test_client.py::TestSearchArtist::test_search_artist_success -v
289
+ ```
290
+
291
+ ### Code Quality
292
+
293
+ ```bash
294
+ # Lint code
295
+ ruff check src/
296
+
297
+ # Format code
298
+ ruff format src/
299
+
300
+ # Type check
301
+ mypy src/barscan/ --ignore-missing-imports
302
+ ```
303
+
304
+ ## Architecture
305
+
306
+ ```
307
+ src/barscan/
308
+ ├── cli.py # Typer CLI entry point (barscan command)
309
+ ├── config.py # Pydantic Settings configuration
310
+ ├── exceptions.py # Exception hierarchy (BarScanError base)
311
+ ├── genius/ # Genius API integration
312
+ │ ├── models.py # Pydantic models (Artist, Song, Lyrics)
313
+ │ ├── client.py # GeniusClient with retry logic
314
+ │ └── cache.py # File-based lyrics cache with TTL
315
+ ├── analyzer/ # Word frequency analysis
316
+ │ ├── models.py # Analysis result models
317
+ │ ├── processor.py # Text preprocessing with NLTK
318
+ │ ├── filters.py # Stop word and length filtering
319
+ │ └── frequency.py # Word counting and aggregation
320
+ └── output/ # Result formatting
321
+ └── wordgrain.py # WordGrain schema export
322
+ ```
323
+
324
+ ## Troubleshooting
325
+
326
+ ### "Genius API token not configured"
327
+
328
+ Make sure you've set the `BARSCAN_GENIUS_ACCESS_TOKEN` environment variable or created a `.env` file with the token.
329
+
330
+ ### "Artist not found"
331
+
332
+ - Check the spelling of the artist name
333
+ - Try using the artist's name exactly as it appears on Genius
334
+ - Some artists may have limited or no presence on Genius
335
+
336
+ ### Rate Limiting
337
+
338
+ BarScan includes automatic retry logic with exponential backoff. If you encounter rate limiting:
339
+
340
+ - The tool will automatically retry failed requests
341
+ - Consider reducing `--max-songs` for large analyses
342
+ - Cached lyrics won't trigger new API calls
343
+
344
+ ### Empty Results
345
+
346
+ If no words appear in results after filtering:
347
+
348
+ - Try `--no-stop-words` to include common words
349
+ - Check if the artist has lyrics available on Genius
350
+ - Some songs may be instrumental or have no lyrics
351
+
352
+ ## License
353
+
354
+ MIT