bbstrader 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/models/nlp.py CHANGED
@@ -2,15 +2,17 @@ import contextlib
2
2
  import os
3
3
  import re
4
4
  import time
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
6
  from datetime import datetime
6
7
  from typing import Dict, List, Tuple
7
8
 
8
9
  import dash
10
+ import en_core_web_sm
9
11
  import matplotlib.pyplot as plt
10
12
  import nltk
11
13
  import pandas as pd
12
14
  import plotly.express as px
13
- import en_core_web_sm
15
+ from bbstrader.core.data import FinancialNews
14
16
  from dash import dcc, html
15
17
  from dash.dependencies import Input, Output
16
18
  from nltk.corpus import stopwords
@@ -18,12 +20,10 @@ from nltk.tokenize import word_tokenize
18
20
  from textblob import TextBlob
19
21
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
20
22
 
21
- from bbstrader.core.data import FinancialNews
22
-
23
-
24
23
  __all__ = [
25
24
  "TopicModeler",
26
25
  "SentimentAnalyzer",
26
+ "LEXICON",
27
27
  "EQUITY_LEXICON",
28
28
  "FOREX_LEXICON",
29
29
  "COMMODITIES_LEXICON",
@@ -331,6 +331,17 @@ FINANCIAL_LEXICON = {
331
331
  **BONDS_LEXICON,
332
332
  }
333
333
 
334
+ LEXICON = {
335
+ "stock": EQUITY_LEXICON,
336
+ "etf": EQUITY_LEXICON,
337
+ "future": FINANCIAL_LEXICON,
338
+ "forex": FOREX_LEXICON,
339
+ "crypto": CRYPTO_LEXICON,
340
+ "index": EQUITY_LEXICON,
341
+ "bond": BONDS_LEXICON,
342
+ "commodity": COMMODITIES_LEXICON,
343
+ }
344
+
334
345
 
335
346
  class TopicModeler(object):
336
347
  def __init__(self):
@@ -345,7 +356,7 @@ class TopicModeler(object):
345
356
  "SpaCy model 'en_core_web_sm' not found. "
346
357
  "Please install it using 'python -m spacy download en_core_web_sm'."
347
358
  )
348
-
359
+
349
360
  def preprocess_texts(self, texts: list[str]):
350
361
  def clean_doc(Doc):
351
362
  doc = []
@@ -379,11 +390,6 @@ class SentimentAnalyzer(object):
379
390
  analysis using VADER (SentimentIntensityAnalyzer) and optional TextBlob
380
391
  for enhanced polarity scoring.
381
392
 
382
- Attributes:
383
- nlp (spacy.Language): A SpaCy NLP pipeline for tokenization and lemmatization,
384
- with Named Entity Recognition (NER) disabled.
385
- analyzer (SentimentIntensityAnalyzer): An instance of VADER's sentiment analyzer
386
- for financial sentiment scoring.
387
393
  """
388
394
 
389
395
  def __init__(self):
@@ -395,8 +401,6 @@ class SentimentAnalyzer(object):
395
401
  - Loads the `en_core_web_sm` SpaCy model with Named Entity Recognition (NER) disabled.
396
402
  - Initializes VADER's SentimentIntensityAnalyzer for sentiment scoring.
397
403
 
398
- Args:
399
- use_spacy (bool): If True, uses SpaCy for lemmatization. Defaults to False.
400
404
  """
401
405
  nltk.download("punkt", quiet=True)
402
406
  nltk.download("stopwords", quiet=True)
@@ -431,7 +435,9 @@ class SentimentAnalyzer(object):
431
435
  str: The cleaned and lemmatized text.
432
436
  """
433
437
  if not isinstance(text, str):
434
- raise ValueError(f"{self.__class__.__name__}: preprocess_text expects a string, got {type(text)}")
438
+ raise ValueError(
439
+ f"{self.__class__.__name__}: preprocess_text expects a string, got {type(text)}"
440
+ )
435
441
  text = text.lower()
436
442
  text = re.sub(r"http\S+", "", text)
437
443
  text = re.sub(r"[^a-zA-Z\s]", "", text)
@@ -479,8 +485,96 @@ class SentimentAnalyzer(object):
479
485
  )
480
486
  return avg_sentiment
481
487
 
488
+ def _get_sentiment_for_one_ticker(
489
+ self,
490
+ ticker: str,
491
+ asset_type: str,
492
+ lexicon=None,
493
+ top_news=10,
494
+ **kwargs,
495
+ ) -> float:
496
+ rd_params = {"client_id", "client_secret", "user_agent"}
497
+ fm_params = {"start", "end", "page", "limit"}
498
+
499
+ # 1. Collect data from all sources
500
+ yahoo_news = self.news.get_yahoo_finance_news(
501
+ ticker, asset_type=asset_type, n_news=top_news
502
+ )
503
+ google_news = self.news.get_google_finance_news(
504
+ ticker, asset_type=asset_type, n_news=top_news
505
+ )
506
+
507
+ reddit_posts = []
508
+ if all(kwargs.get(rd) for rd in rd_params):
509
+ reddit_posts = self.news.get_reddit_posts(
510
+ ticker,
511
+ n_posts=top_news,
512
+ **{k: kwargs.get(k) for k in rd_params},
513
+ )
514
+
515
+ coindesk_news = self.news.get_coindesk_news(query=ticker, list_of_str=True)
516
+
517
+ fmp_source_news = []
518
+ if kwargs.get("fmp_api"):
519
+ fmp_news_client = self.news.get_fmp_news(kwargs.get("fmp_api"))
520
+ for src in ["articles"]:
521
+ try:
522
+ source_news = fmp_news_client.get_news(
523
+ ticker,
524
+ source=src,
525
+ symbol=ticker,
526
+ **{k: kwargs.get(k) for k in fm_params},
527
+ )
528
+ fmp_source_news.extend(source_news)
529
+ except Exception:
530
+ continue
531
+
532
+ # 2. Analyze sentiment for each source
533
+ news_sentiment = self.analyze_sentiment(
534
+ yahoo_news + google_news, lexicon=lexicon
535
+ )
536
+ reddit_sentiment = self.analyze_sentiment(
537
+ reddit_posts, lexicon=lexicon, textblob=True
538
+ )
539
+ fmp_sentiment = self.analyze_sentiment(
540
+ fmp_source_news, lexicon=lexicon, textblob=True
541
+ )
542
+ coindesk_sentiment = self.analyze_sentiment(
543
+ coindesk_news, lexicon=lexicon, textblob=True
544
+ )
545
+
546
+ # 3. Compute weighted average sentiment score
547
+ sentiments = [
548
+ news_sentiment,
549
+ reddit_sentiment,
550
+ fmp_sentiment,
551
+ coindesk_sentiment,
552
+ ]
553
+ # Count how many sources provided data to get a proper average
554
+ num_sources = sum(
555
+ 1
556
+ for source_data in [
557
+ yahoo_news + google_news,
558
+ reddit_posts,
559
+ fmp_source_news,
560
+ coindesk_news,
561
+ ]
562
+ if source_data
563
+ )
564
+
565
+ if num_sources == 0:
566
+ return 0.0
567
+
568
+ overall_sentiment = sum(sentiments) / num_sources
569
+ return overall_sentiment
570
+
482
571
  def get_sentiment_for_tickers(
483
- self, tickers: List[str] | List[Tuple[str, str]], lexicon=None, asset_type="stock", top_news=10, **kwargs
572
+ self,
573
+ tickers: List[str] | List[Tuple[str, str]],
574
+ lexicon=None,
575
+ asset_type="stock",
576
+ top_news=10,
577
+ **kwargs,
484
578
  ) -> Dict[str, float]:
485
579
  """
486
580
  Computes sentiment scores for a list of financial tickers based on news and social media data.
@@ -501,7 +595,7 @@ class SentimentAnalyzer(object):
501
595
  - if using tuples, the first element is the ticker and the second is the asset type.
502
596
  - if using a single string, the asset type must be specified or the default is "stock".
503
597
  lexicon (dict, optional): A custom sentiment lexicon to update VADER's default lexicon.
504
- asset_type (str, optional): The type of asset, Defaults to "stock",
598
+ asset_type (str, optional): The type of asset, Defaults to "stock",
505
599
  supported types include:
506
600
  - "stock": Stock symbols (e.g., AAPL, MSFT)
507
601
  - "etf": Exchange-traded funds (e.g., SPY, QQQ)
@@ -522,82 +616,58 @@ class SentimentAnalyzer(object):
522
616
  Notes:
523
617
  The tickers names must follow yahoo finance conventions.
524
618
  """
619
+
525
620
  sentiment_results = {}
526
- rd_params = {"client_id", "client_secret", "user_agent"}
527
- fm_params = {"start", "end", "page", "limit"}
528
- with open(os.devnull, 'w') as devnull:
529
- with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
530
- for asset in tickers:
531
- if isinstance(asset, tuple):
532
- ticker, asset_type = asset
533
- if asset_type not in [
534
- "stock",
535
- "etf",
536
- "future",
537
- "forex",
538
- "crypto",
539
- "index",
540
- ]:
541
- raise ValueError(
542
- f"Unsupported asset type '{asset_type}'. "
543
- "Supported types: stock, etf, future, forex, crypto, index."
544
- )
545
- # Collect data
546
- sources = 0
547
- yahoo_news = self.news.get_yahoo_finance_news(
548
- ticker, asset_type=asset_type, n_news=top_news
549
- )
550
- google_news = self.news.get_google_finance_news(
551
- ticker, asset_type=asset_type, n_news=top_news
552
- )
553
- reddit_posts = []
554
- if all(kwargs.get(rd) for rd in rd_params):
555
- reddit_posts = self.news.get_reddit_posts(
556
- ticker, n_posts=top_news, **{k: kwargs.get(k) for k in rd_params}
557
- )
558
- coindesk_news = self.news.get_coindesk_news(query=ticker, list_of_str=True)
559
- fmp_source_news = []
560
- if kwargs.get("fmp_api"):
561
- fmp_news = self.news.get_fmp_news(kwargs.get("fmp_api"))
562
- for src in ["articles"]: # , "releases", asset_type]:
563
- try:
564
- source_news = fmp_news.get_news(
565
- ticker, source=src, symbol=ticker, **{k: kwargs.get(k) for k in fm_params}
566
- )
567
- fmp_source_news += source_news
568
- except Exception:
569
- continue
570
- if any([len(s) > 0 for s in [yahoo_news, google_news]]):
571
- sources += 1
572
- for source in [reddit_posts, fmp_source_news, coindesk_news]:
573
- if len(source) > 0:
574
- sources += 1
575
- # Compute sentiment
576
- news_sentiment = self.analyze_sentiment(
577
- yahoo_news + google_news, lexicon=lexicon
578
- )
579
- reddit_sentiment = self.analyze_sentiment(
580
- reddit_posts, lexicon=lexicon, textblob=True
581
- )
582
- fmp_sentiment = self.analyze_sentiment(
583
- fmp_source_news, lexicon=lexicon, textblob=True
584
- )
585
- coindesk_sentiment = self.analyze_sentiment(
586
- coindesk_news, lexicon=lexicon, textblob=True
587
- )
588
621
 
589
- # Weighted average sentiment score
590
- if sources != 0:
591
- overall_sentiment = (
592
- news_sentiment
593
- + reddit_sentiment
594
- + fmp_sentiment
595
- + coindesk_sentiment
596
- ) / sources
597
- else:
598
- overall_sentiment = 0.0
599
- sentiment_results[ticker] = overall_sentiment
600
- time.sleep(1) # To avoid hitting API rate limits
622
+ # Suppress stdout/stderr from underlying libraries during execution
623
+ with open(os.devnull, "w") as devnull:
624
+ with (
625
+ contextlib.redirect_stdout(devnull),
626
+ contextlib.redirect_stderr(devnull),
627
+ ):
628
+ with ThreadPoolExecutor() as executor:
629
+ # Map each future to its ticker for easy result lookup
630
+ future_to_ticker = {}
631
+ for ticker_info in tickers:
632
+ # Normalize input to (ticker, asset_type)
633
+ if isinstance(ticker_info, tuple):
634
+ ticker_symbol, ticker_asset_type = ticker_info
635
+ else:
636
+ ticker_symbol, ticker_asset_type = ticker_info, asset_type
637
+
638
+ if ticker_asset_type not in [
639
+ "stock",
640
+ "etf",
641
+ "future",
642
+ "forex",
643
+ "crypto",
644
+ "index",
645
+ ]:
646
+ raise ValueError(
647
+ f"Unsupported asset type '{ticker_asset_type}' for {ticker_symbol}."
648
+ )
649
+
650
+ # Submit the job to the thread pool
651
+ future = executor.submit(
652
+ self._get_sentiment_for_one_ticker,
653
+ ticker=ticker_symbol,
654
+ asset_type=ticker_asset_type,
655
+ lexicon=lexicon,
656
+ top_news=top_news,
657
+ **kwargs,
658
+ )
659
+ future_to_ticker[future] = ticker_symbol
660
+
661
+ # Collect results as they are completed
662
+ for future in as_completed(future_to_ticker):
663
+ ticker_symbol = future_to_ticker[future]
664
+ try:
665
+ sentiment_score = future.result()
666
+ sentiment_results[ticker_symbol] = sentiment_score
667
+ except Exception:
668
+ sentiment_results[ticker_symbol] = (
669
+ 0.0 # Assign a neutral score on error
670
+ )
601
671
 
602
672
  return sentiment_results
603
673
 
@@ -153,7 +153,7 @@ def optimized_weights(prices=None, returns=None, rfr=0.0, freq=252, method="equa
153
153
  freq : int, optional
154
154
  Number of days for calculating portfolio weights, such as 252 for a year's worth of daily returns (default is 252).
155
155
  method : str, optional
156
- Optimization method to use ('markowitz', 'hrp', or 'equal') (default is 'markowitz').
156
+ Optimization method to use ('markowitz', 'hrp', or 'equal') (default is 'equal').
157
157
 
158
158
  Returns
159
159
  -------